---
title: "'Variation in health system performance for managing diabetes in India"
author: "Jonas Prenissl"
date: "3/1/2019"
output: html_document
---




```{r Input}


# Data cleaning and merging
library(tidyverse) 
library(dplyr) 
library(forcats) # for categorical variables (R for data science rec) --> see https://rdrr.io/cran/forcats/man/fct_unify.html
library(stringr) # for manipulating string variables (R for data science rec)
#library(lubridate) # for dates and times (R for data science rec)
#instalibrary(dummies) # to easily create dummies
library(ggplot2) 
library(ggrepel) # to avoid text labels in ggplot from overlapping
library(modelr) # to use "add_predictions()" for adding a column of predicted vals to your dataset
library(broom) # to create tidy data from model output
#library(margins) # R equivalent of Stata's margins command --> Thomas Leeper said this only to be used for marginal effects (not prediction)
#library(prediction) # Thomas Leeper's R package to get predicted probabilities
library(srvyr)  # survey package that also works with dplyr 
library(lmtest) # for likelihood ratio tests
#library(sandwich) # for robust standard errors 
#library(multiwayvcov) # for clustered standard errors
library(miceadds) # package to cluster SEs more easily than in multiwayvcov; it uses multiwayvcov, so the results between the two packages are exactly the same. 
library(speedglm)
library(foreign) # for importing non-csv datasets
library(readstata13) # foreign only reads Stata 12 or lower
#library(lme4) # for multi-level modeling
#library(lmerTest) # for p-values with the lmer command
#library(sjPlot) # for plotting lmer models
#library(texreg) # for tables
library(tableone) # Creates a table 1 (summary characteristics)
#library(mice) # md.pattern() function to see patterns of missing data 
#library(reshape) # to use the rescalar function

#library(car) # for easy attaching of new variables
#library(arm)
#library(mosaic)
#library(mosaicData)
library(mediation)  # for mediation analysis
#library(lattice)
#library(pander)
library(foreign)
library(clusterSEs)
library(DataCombine)
 library(rms)




DHS.India.updated <- read.csv("~/Documents/Public Health Files/Public Health/public health/DHS.India.updated.csv")
dhs <- DHS.India.updated
dhs <- as_tibble(dhs)


setwd("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54")


##filter out individuals >50 years

dhs <- filter(dhs,age<50)

##make new weights that adjust for different number of women and men in survey

dhs <- mutate(dhs, p_wt_new = ifelse(sex==0 & age==15, p_wt* { ( (25101) / (12159708) )   /   ( (3761) / (13739746) ) },
                                     ifelse(sex==0 & age==16,  p_wt* { ( (24702) / (11564358) )   /   ( (3844) / (13027935) ) },
                                            ifelse(sex==0 & age==17,  p_wt* { ( (22674) / (9868018) )   /   ( (3725) / (11349449) ) },
                                                   ifelse(sex==0 & age==18,   p_wt* { ( (25320) / (12937296) )   /   ( (4177) / (15020851) ) },
                                                          ifelse(sex==0 & age==19,   p_wt* { ( (19462) / (10014673) )   /   ( (3203) / (10844415) ) },
                                                                 ifelse(sex==0 & age==20,   p_wt* { ( (23573) / (13990570) )   /   ( (3632) / (14892165) ) },
                                                                        ifelse(sex==0 & age==21,  p_wt* { ( (18442) / (9446694) )   /   ( (3028) / (10532278) ) },
                                                                               ifelse(sex==0 & age==22,   p_wt* { ( (22212) / (11135249) )   /   ( (3472) / (12392976) ) },
                                                                                      ifelse(sex==0 & age==23,   p_wt* { ( (19660) / (9479866) )   /   ( (2989) / (9674189) ) },
                                                                                             ifelse(sex==0 & age==24,   p_wt* { ( (19262) / (9787150) )   /   ( (3061) / (10093085) ) },
                                                                                                    ifelse(sex==0 & age==25,  p_wt* { ( (25318) / (13456554) )   /   ( (3821) / (14311524) ) },
                                                                                                           ifelse(sex==0 & age==26,   p_wt* { ( (19390) / (9761967) )   /   ( (3131) / (10315030) ) },
                                                                                                                  ifelse(sex==0 & age==27,   p_wt* { ( (18113) / (8157318) )   /   ( (2957) / (8552032) ) },
                                                                                                                         ifelse(sex==0 & age==28,   p_wt* { ( (22299) / (11407090) )   /   ( (3413) / (10719926) ) },
                                                                                                                                ifelse(sex==0 & age==29,   p_wt* { ( (15413) / (7286828) )   /   ( (2476) / (7445696) ) },
                                                                                                                                       ifelse(sex==0 & age==30,  p_wt* { ( (27923) / (14770033) )   /   ( (4356) / (15628996) ) },
                                                                                                                                              ifelse(sex==0 & age==31,   p_wt* { ( 13757 / (6665743) )   /   ( (2190) / (7157502) ) },
                                                                                                                                                     ifelse(sex==0 & age==32, p_wt* { ( (20375) / (8812439) )   /   ( (3156) / (8801105) ) },
                                                                                                                                                            ifelse(sex==0 & age==33,  p_wt* { ( (14514) / (6655662) )   /   ( (2299) / (6108879) ) },
                                                                                                                                                                   ifelse(sex==0 & age==34,   p_wt* { ( (14285) / (7030400) )   /   ( (2348) / (6964192) ) },
                                                                                                                                                                          ifelse(sex==0 & age==35,   p_wt* { ( (27035) / (13385965) )   /   ( (4391) / (15036666) ) },
                                                                                                                                                                                 ifelse(sex==0 & age==36,   p_wt* { ( (15670) / (7760149) )   /   ( (2435) / (8067568) ) },
                                                                                                                                                                                        ifelse(sex==0 & age==37,   p_wt* { ( (13795) / (5907352) )   /   ( (2176) / (5784879) ) },
                                                                                                                                                                                               ifelse(sex==0 & age==38,   p_wt* { ( (18693) / (9381357) )   /   ( (2723) / (8090401) ) },
                                                                                                                                                                                                      ifelse(sex==0 & age==39,   p_wt* { ( (12683) / (5786480) )   /   ( (1968) / (5939867) ) },
                                                                                                                                                                                                             ifelse(sex==0 & age==40,   p_wt* { ( (25682) / (13355581) )   /   ( (3974) / (15173411) ) },
                                                                                                                                                                                                                    ifelse(sex==0 & age==41,  p_wt* { ( (11046) / (5395597) )   /   ( (1819) / (6172297) ) },
                                                                                                                                                                                                                           ifelse(sex==0 & age==42,  p_wt* { ( (16124) / (6523816) )   /   ( (2544) / (6856826) ) },
                                                                                                                                                                                                                                  ifelse(sex==0 & age==43,   p_wt* { ( (11983) / (4865438) )   /   ( (1797) / (4468914) ) },
                                                                                                                                                                                                                                         ifelse(sex==0 & age==44,   p_wt* { ( (10836) / (4752294) )   /   ( (1714) / (4873938) ) },
                                                                                                                                                                                                                                                ifelse(sex==0 & age==45,  p_wt* { ( (23877) / (11187786) )   /   ( (3854) / (12685175) ) },
                                                                                                                                                                                                                                                       ifelse(sex==0 & age==46,  p_wt* { ( (11752) / (5257138) )   /   ( (1895) / (5735540) ) },
                                                                                                                                                                                                                                                              ifelse(sex==0 & age==47,   p_wt* { ( (11295) / (3908175) )   /   ( (1714) / (4043122) ) },
                                                                                                                                                                                                                                                                     ifelse(sex==0 & age==48, p_wt* { ( (14786) / (6081038) )   /   ( (2119) / (5568554) ) },
                                                                                                                                                                                                                                                                            ifelse(sex==0 & age==49,   p_wt* { ( (10399) / (3746076) )   /   ( (1506) / (4105723)) }, p_wt ))))))))))))))))))))))))))))))))))))
dhs <- mutate(dhs, p_wt_new = ifelse(sex==0 & age>=50, p_wt_new/1.013194, p_wt_new )) 
dhs <- mutate(dhs, p_wt_new = ifelse(sex==0 & age>=50, p_wt * { ((6.72342) * (1.013194)) / (0.998047) }, p_wt_new ))   



# 3. Filter out those <18 or pregnant  ##########################################################################################################
####dhs_nomiss <- dplyr::filter(dhs_nomiss, age> 18) # only those >18 and with non-missing age
dhs <- dplyr::filter(dhs, pregnant == 0)# only keep those who aren't pregnant (they didn't measure glucose in pregnant women anyway); dhs_nomiss has no missings in the pregnant variable



##Define diabetics

dhs <- mutate(dhs, ex_glucose_ind=
                            ifelse(dhs$ex_glucose_ind>900 | is.na(ex_glucose_ind)==T,NA,ex_glucose_ind))
dhs <- mutate(dhs, fbg=
                ifelse(dhs$fbg>49.95 | is.na(fbg)==T,NA,fbg))
dhs <- mutate(dhs, ex_glucose_ind = ex_glucose_ind*1.11)


dhs <- mutate(dhs,
              ex_diab_narrow_ind = ifelse( is.na(ex_glucose_ind)==T, NA, 
                                           ifelse(ex_glucose_ind>=200, 1, 
                                                  ifelse(fast==1 & ex_glucose_ind>=126, 1, 0))))


dhs$ex_diab_narrow_ind <- as.factor(dhs$ex_diab_narrow_ind)

dhs <-mutate(dhs,
             ex_diab_broad_ind =  ifelse(is.na(ex_diab_narrow_ind) ==T | is.na(hbg12)==T, NA,  
                                         ifelse(hbg12==1 | ex_diab_narrow_ind==1, 1, 0)))
dhs$ex_diab_broad_ind <- as.factor(dhs$ex_diab_broad_ind)


#####Filter out those that cannot be said if diabetic or not, missing fbg and missing fast

dhs_nomiss <- filter(dhs, is.na(dhs$ex_glucose_ind)==F)
dhs_nomiss <- filter(dhs_nomiss,is.na(dhs_nomiss$fast)==F)


summary(dhs$ex_diab_broad_ind)
summary(dhs$ex_diab_narrow_ind)

#works missing weights as average

dhs_nomiss <- mutate(dhs_nomiss, 
              p_wt_new = ifelse(is.na(p_wt_new)==TRUE, mean(p_wt_new, na.rm=TRUE), p_wt_new))

dhs_nomiss <- dplyr::mutate(dhs_nomiss, 
                     p_wt_newfemale = ifelse(sex==1, dhs_nomiss$p_wt_new, NA))

dhs_nomiss <- dplyr::mutate(dhs_nomiss, 
                     p_wt_newmale = ifelse(sex==0, dhs_nomiss$p_wt_new, NA))



# 9. Create age group women and men, educat group in dhs ##############################################################################
dhs <- dplyr::mutate(dhs, age_grp = ifelse(age<=24, "15-24", 
                                                 ifelse(age>24 & age<=34, "25-34",
                                                        ifelse(age>34 & age<=44, "35-44",
                                                               ifelse(age>44 & age<=49, "45-49","50-54"))))) 
dhs$age_grp <- factor(dhs$age_grp, levels = c("15-24", "25-34", "35-44", "45-49", "50-54"))
dhs <- within(dhs, age_grp <- relevel(age_grp, ref = "15-24"))


#large age groups

dhs <- dplyr::mutate(dhs, age_grp_large = ifelse(age<=27, "15-27",
                                           ifelse(age>27 & age<=40, "28-40", "41-54"))) 
dhs$age_grp_large <- factor(dhs$age_grp_large, levels = c("15-27", "28-40", "41-54"))
dhs <- within(dhs, age_grp_large <- relevel(age_grp_large, ref = "15-27"))

#age group by sex


dhs <- dplyr::mutate(dhs, age_grp_women = ifelse(age<=24 & sex==1, "15-24", 
                                                               ifelse(age>24 & sex==1 & age<=34, "25-34",
                                                                      ifelse(age>34 & sex==1 & age<=44, "35-44",
                                                                             ifelse(age>44 & sex==1 & age<=49, "45-49",">49"))))) 
dhs$age_grp_women <- factor(dhs$age_grp_women, levels = c("15-24", "25-34", "35-44", "45-49", ">49"))
dhs <- within(dhs, age_grp_women <- relevel(age_grp_women, ref = "15-24"))


dhs <- dplyr::mutate(dhs, age_grp_men = ifelse(age>14 & sex==0 & age<=24, "15-24", 
                                                             ifelse(age>24 & sex==0 & age<=34, "25-34",
                                                                    ifelse(age>34 & sex==0 & age<=44, "35-44",
                                                                           ifelse(age>44 & sex==0 & age<=54, "45-54", ">54")))))
dhs$age_grp_men <- factor(dhs$age_grp_men, levels = c("15-24", "25-34", "35-44", "45-54", ">54"))
dhs <- within(dhs, age_grp_men <- relevel(age_grp_men, ref = "15-24"))

##educatgroups



dhs <- dplyr::mutate(dhs, educat = ifelse(educat==1 | educat==0 , 0, educat))

dhs <- dplyr::mutate(dhs, educatnames = 
                                           ifelse(educat==1 | educat==0 | educat==2, "Primary schoon finished/unfinished",
                                                                           ifelse(educat==3, "Secondary school unfinished",
                                                                                  ifelse(educat==4, "Secondary school or above", "Refused"))))
dhs$educatnames <- factor(dhs$educatnames, levels = c( "Primary schoon finished/unfinished", "Secondary school unfinished", "Secondary school or above", "Refused"))
dhs <- within(dhs, educatnames<- relevel(educatnames, ref = "Primary schoon finished/unfinished"))

##marriedgroups
dhs <- dplyr::mutate(dhs, maritalnames = ifelse(marital==1, "Never married", 
                                                              ifelse(marital==2, "Currently married",
                                                                     ifelse(marital==3, "Separated",
                                                                            ifelse(marital==4, "Divorced",
                                                                                   ifelse(marital==5, "Widowed",
                                                                                          ifelse(marital==6, "Cohabitating",
                                                                                                 ifelse(marital==7, "divorced or separated or deserted or widowed","Refused")))))))) 
dhs$maritalnames <- factor(dhs$maritalnames, levels = c("Never married", "Currently married", "Separated", "Divorced", "Widowed", "Cohabitating", "divorced/separated/deserted/widowed", "Refused"))
dhs <- within(dhs, maritalnames<- relevel(maritalnames, ref = "Never married"))

###create married variable in dhs


dhs <- mutate( dhs,
                      married = ifelse( dhs$marital==2, 1, 0))

dhs$married <- as.factor(dhs$married)


dhs <- mutate( dhs,
                      marriednames = ifelse( dhs$marital==2, "Married","Not married"))

dhs$marriednames <- as.factor(dhs$marriednames)

dhs <- within(dhs, marriednames<- relevel(marriednames, ref = "Not married"))




dhs <- mutate(dhs,
                     age_grpOR = 
                       as.factor(ifelse(
                         age>=15 & age<20, "15-19",
                         ifelse(age>=20 & age<25, "20-24",
                                ifelse(age>=25 & age<30, "25-29",
                                       ifelse(age>=30 & age<35,"30-34", 
                                              ifelse(age>=35 & age<40, "35-39",
                                                     ifelse(age>=40 & age<45, "40-44",
                                                            ifelse(age>=45 & age<50, "45-49",
                                                                   ifelse(age>=50 & age<55, "50-54",NA))))))))))





# Diabetes medication as factor in dhs

dhs$ex_dia_med_ind <- as.factor(dhs$ex_dia_med_ind)

######wealth_quintile_rurb as factor in dhs

dhs$wealth_quintile_rurb <- as.factor(dhs$wealth_quintile_rurb)


# create labels for female, rural, rich in dhs

dhs <- mutate(dhs, 
                     rural = ifelse(urban==1, 0, 1),
                     female = ifelse(sex==1, 1, 0),
                     male = ifelse(sex==1, 0, 1))
dhs$rural <- as.factor(dhs$rural)
dhs$female <- as.factor(dhs$female)
dhs$male <- as.factor(dhs$male)



dhs <- dhs %>% 
  mutate(female_lab = as.factor(ifelse(is.na(female) == TRUE, NA, 
                                       ifelse(female == 1, "Female", "Male"))),
         urban_lab = as.factor(ifelse(is.na(rural) == TRUE, NA, 
                                      ifelse(rural == 1, "Rural", "Urban"))),
         wealth_quintile_rurb_lab = as.factor(ifelse(is.na(wealth_quintile_rurb) == TRUE, NA, 
                                                     ifelse( wealth_quintile_rurb == 1, "Q1 (Poorest)", 
                                                             ifelse(wealth_quintile_rurb == 2, "Q2",
                                                                    ifelse(wealth_quintile_rurb == 3, "Q3",
                                                                           ifelse(wealth_quintile_rurb == 4, "Q4",
                                                                                  ifelse(wealth_quintile_rurb == 5, "Q5 (Richest)", NA))))))))

dhs$wealth_quintile_rurb_lab <- as.factor(dhs$wealth_quintile_rurb_lab)
dhs<- within(dhs, wealth_quintile_rurb_lab <- relevel(wealth_quintile_rurb_lab, ref = "Q1 (Poorest)"))




# 9. Create age group women and men, educat group in dhs_nomiss ##############################################################################

###create married variable

dhs_nomiss <- mutate( dhs_nomiss,
                      marriednames = ifelse( dhs_nomiss$marital==2, "Married","Not married"))
###Age groups

dhs_nomiss <- dplyr::mutate(dhs_nomiss, age_grp = ifelse(age<=24, "15-24", 
                                           ifelse(age>24 & age<=34, "25-34",
                                                  ifelse(age>34 & age<=44, "35-44","45-54")))) 
dhs_nomiss$age_grp <- factor(dhs_nomiss$age_grp, levels = c("15-24", "25-34", "35-44", "45-54"))
dhs_nomiss <- within(dhs_nomiss, age_grp <- relevel(age_grp, ref = "15-24"))



##large age group

dhs_nomiss <- dplyr::mutate(dhs_nomiss, age_grp_large = ifelse(age<=27, "15-27",
                                                 ifelse(age>27 & age<=40, "28-40", "41-54"))) 
dhs_nomiss$age_grp_large <- factor(dhs_nomiss$age_grp_large, levels = c("15-27", "28-40", "41-54"))
dhs_nomiss <- within(dhs_nomiss, age_grp_large <- relevel(age_grp_large, ref = "15-27"))


##age group per sex

dhs_nomiss <- dplyr::mutate(dhs_nomiss, age_grp_women = ifelse(age<=24 & sex==1, "15-24", 
                                                 ifelse(age>24 & sex==1 & age<=34, "25-34",
                                                        ifelse(age>34 & sex==1 & age<=44, "35-44", 
                                                               ifelse(age>44 &sex==1 & age<=49, "45-49", ">49"))))) 
dhs_nomiss$age_grp_women <- factor(dhs_nomiss$age_grp_women, levels = c("15-24", "25-34", "35-44", "45-49", ">49"))

dhs_nomiss <- within(dhs_nomiss, age_grp_women <- relevel(age_grp_women, ref = "15-24"))


dhs_nomiss <- dplyr::mutate(dhs_nomiss, age_grp_men = ifelse(age>14 & sex==0 & age<=24, "15-24", 
                                               ifelse(age>24 & sex==0 & age<=34, "25-34",
                                                      ifelse(age>34 & sex==0 & age<=44, "35-44",
                                                             ifelse(age>44 & sex==0 & age<=54, "45-54", ">54"))))) 
dhs_nomiss$age_grp_men <- factor(dhs_nomiss$age_grp_men, levels = c("15-24", "25-34", "35-44", "45-54", ">54"))
dhs_nomiss <- within(dhs_nomiss, age_grp_men <- relevel(age_grp_men, ref = "15-24"))

###age group for educat analysis

dhs_nomiss <- dplyr::mutate(dhs_nomiss, age_grp_educat = ifelse(age>18 & age<=26, "19-26", 
                                                         ifelse(age>26 & age<=34, "27-34",
                                                                ifelse(age>34 & age<=42, "35-42",
                                                                       ifelse(age>42 & age<=49, "43-49", NA))))) 
dhs_nomiss$age_grp_educat <- factor(dhs_nomiss$age_grp_educat, levels = c("19-26", "27-34", "35-42", "43-49"))
dhs_nomiss <- within(dhs_nomiss, age_grp_educat <- relevel(age_grp_educat, ref = "19-26"))

dhs_nomiss <- dplyr::mutate(dhs_nomiss, age_grp_educat_men =  ifelse(age>18 & age<=26, "19-26", 
                                                                     ifelse(age>26 & age<=34, "27-34",
                                                                            ifelse(age>34 & age<=42, "35-42",
                                                                                   ifelse(age>42 , "43-54", NA))))) 
dhs_nomiss$age_grp_educat_men <- factor(dhs_nomiss$age_grp_educat_men, levels = c("19-26", "27-34", "35-42", "43-54"))
dhs_nomiss <- within(dhs_nomiss, age_grp_educat_men <- relevel(age_grp_educat_men, ref = "19-26"))



####educat reform

dhs_nomiss <- dplyr::mutate(dhs_nomiss, educat = ifelse(educat==1 | educat==0 , 0, educat))

dhs_nomiss <- dplyr::mutate(dhs_nomiss, educatnames = 
                                           ifelse(educat==1 | educat==0 | educat==2, "Primary schoon finished/unfinished",
                                                                           ifelse(educat==3, "Secondary school unfinished",
                                                                                  ifelse(educat==4, "Secondary school or above", "Refused"))))
dhs_nomiss$educatnames <- factor(dhs_nomiss$educatnames, levels = c( "Primary schoon finished/unfinished", "Secondary school unfinished", "Secondary school or above", "Refused"))
dhs_nomiss <- within(dhs_nomiss, educatnames<- relevel(educatnames, ref = "Primary schoon finished/unfinished"))





###married subgroups

dhs_nomiss <- dplyr::mutate(dhs_nomiss, maritalnames = ifelse(marital==1, "Never married", 
                                                ifelse(marital==2, "Currently married",
                                                       ifelse(marital==3, "Separated",
                                                              ifelse(marital==4, "Divorced",
                                                                     ifelse(marital==5, "Widowed",
                                                                            ifelse(marital==6, "Cohabitating",
                                                                                   ifelse(marital==7, "divorced or separated or deserted or widowed","Refused")))))))) 


dhs_nomiss$maritalnames <- factor(dhs_nomiss$maritalnames, levels = c("Never married", "Currently married", "Separated", "Divorced", "Widowed", "Cohabitating", "divorced/separated/deserted/widowed", "Refused"))
dhs_nomiss <- within(dhs_nomiss, maritalnames<- relevel(maritalnames, ref = "Never married"))




###create married variable

dhs_nomiss <- mutate( dhs_nomiss,
                      married = ifelse( dhs_nomiss$marital==2, 1, 0))

dhs_nomiss$married <- as.factor(dhs_nomiss$married)


dhs_nomiss <- mutate( dhs_nomiss,
                      marriednames = ifelse( dhs_nomiss$married==1, "Married","Not married"))

dhs_nomiss$marriednames <- as.factor(dhs_nomiss$marriednames)

dhs_nomiss <- within(dhs_nomiss, marriednames<- relevel(marriednames, ref = "Not married"))



dhs_nomiss <- mutate(dhs_nomiss,
                                   age_grpOR = 
                                     as.factor(ifelse(
                                       age>=15 & age<20, "15-19",
                                       ifelse(age>=20 & age<25, "20-24",
                                              ifelse(age>=25 & age<30, "25-29",
                                                     ifelse(age>=30 & age<35,"30-34", 
                                                            ifelse(age>=35 & age<40, "35-39",
                                                                   ifelse(age>=40 & age<45, "40-44",
                                                                          ifelse(age>=45 & age<50, "45-49",
                                                                                 ifelse(age>=50 & age<55, "50-54",NA))))))))))

dhs_nomiss$age_grpOR <- factor(dhs_nomiss$age_grpOR, levels = c("15-19","20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54"))

dhs_nomiss <- within(dhs_nomiss, age_grpOR <- relevel(age_grpOR, ref = "15-19"))



### test if unique and number
#unique(dhs$d_id)
#length(unique(dhs$d_id))
#length(unique(dhs$psu))


# Diabetes medication as factor

dhs_nomiss$ex_dia_med_ind <- as.factor(dhs_nomiss$ex_dia_med_ind)

######wealth_quintile_rurb as factor

dhs_nomiss$wealth_quintile_rurb <- as.factor(dhs_nomiss$wealth_quintile_rurb)


# create labels for female, rural, rich

dhs_nomiss <- mutate(dhs_nomiss, 
              rural = ifelse(urban==1, 0, 1),
              female = ifelse(sex==1, 1, 0),
              male = ifelse(sex==1, 0, 1))
dhs_nomiss$rural <- as.factor(dhs_nomiss$rural)
dhs_nomiss$female <- as.factor(dhs_nomiss$female)
dhs_nomiss$male <- as.factor(dhs_nomiss$male)



dhs_nomiss <- dhs_nomiss %>% 
  mutate(female_lab = as.factor(ifelse(is.na(female) == TRUE, NA, 
                                       ifelse(female == 1, "Female", "Male"))),
         urban_lab = as.factor(ifelse(is.na(rural) == TRUE, NA, 
                                      ifelse(rural == 1, "Rural", "Urban"))),
         wealth_quintile_rurb_lab = as.factor(ifelse(is.na(wealth_quintile_rurb) == TRUE, NA, 
                                                     ifelse( wealth_quintile_rurb == 1, "Q1 (Poorest)", 
                                                             ifelse(wealth_quintile_rurb == 2, "Q2",
                                                             ifelse(wealth_quintile_rurb == 3, "Q3",
                                                                    ifelse(wealth_quintile_rurb == 4, "Q4",
                                                             ifelse(wealth_quintile_rurb == 5, "Q5 (Richest)", NA))))))))

dhs_nomiss$wealth_quintile_rurb_lab <- as.factor(dhs_nomiss$wealth_quintile_rurb_lab)
dhs_nomiss<- within(dhs_nomiss, wealth_quintile_rurb_lab <- relevel(wealth_quintile_rurb_lab, ref = "Q1 (Poorest)"))



###create missing Blood glucose MEASUREMENT or missing fast summary


dhs <- mutate(dhs, 
              missing_diab_measure = ifelse(is.na(ex_glucose_ind)==T | is.na(fast)==T, 1, 0))
dhs <- mutate(dhs, 
              missing_diab_measure_names = ifelse(missing_diab_measure==1, "missing", "not missing"))

table_missing_diab_measure_names <- c("female_lab", 
                         "ex_diab_broad_ind", "age_grpOR", "educatnames", "wealth_quintile_rurb_lab", "marriednames", "urban_lab")




stratbymissingdiabmeasure <- CreateTableOne(vars = table_missing_diab_measure_names, data=dhs, strata=c("missing_diab_measure_names"), includeNA=T)
missing <-print(stratbymissingdiabmeasure)

write.csv(missing, file="missing 3.3.18.csv")

#stratbysexdiab <- CreateTableOne(vars = table_missing_diab_measure_names, data=dhs, strata=c("female_lab"), includeNA=T)
#print(stratbysexdiab)


# Summary dhs_nomiss

dhs_nomiss$fast <- as.factor(dhs_nomiss$fast)



table1names <- c("ex_diab_broad_ind", "age_grpOR", "fast", "educatnames", "wealth_quintile_rurb_lab", "marriednames", "urban_lab")




totalwithmiss <- CreateTableOne(vars = table1names, data=dhs_nomiss, includeNA=T)
total <- print(totalwithmiss)

write.csv(total, file="dhs_nomiss summary.csv")

sexwithmiss <- CreateTableOne(vars = table1names, data=dhs_nomiss, strata=c("female_lab"), includeNA=T)
sexdhs_nomiss <- print(sexwithmiss)

write.csv(sexdhs_nomiss, file= "dhs_nomiss summary per sex.csv")



#ex_diab_broad_indwithmiss <- CreateTableOne(vars = table1names, data=dhs_nomiss, strata=c("ex_diab_broad_ind"), includeNA=T)
#print(ex_diab_broad_indwithmiss)


# Summary dhs

#table1names <- c("ex_diab_broad_ind", "age_grpOR",
#                 "age_grp_men", "age_grp_women", "educatnames", "wealth_quintile_rurb_lab", "marriednames", "urban_lab")
#"missing")



#totalwithmiss <- CreateTableOne(vars = table1names, data=dhs, includeNA=T)
#print(totalwithmiss)

#sexwithmiss <- CreateTableOne(vars = table1names, data=dhs, strata=c("female_lab"), includeNA=T)
#print(sexwithmiss)

#ex_diab_broad_indwithmiss <- CreateTableOne(vars = table1names, data=dhs_nomiss, strata=c("ex_diab_broad_ind"), includeNA=T)
#print(ex_diab_broad_indwithmiss)




# create stratum ID
dhs_nomiss$state_dist_str <- as.character(dhs_nomiss$ex_state_ind)
dhs_nomiss$rural_str <- as.character(dhs_nomiss$rural)
dhs_nomiss <- mutate(dhs_nomiss, 
              stratumid = str_c(state_dist_str, rural_str, sep = "_")) 
dhs_nomiss$stratumid <- as.factor(dhs_nomiss$stratumid)




##only diabetic population, number of NAs


dhs_nomiss_diabetic_only <- filter(dhs_nomiss, (ex_diab_broad_ind)==1) 


#table4names <- c( "hbg12" , "ex_dia_med_ind" , "ex_diab_narrow_ind" )



#totalwithmiss <- CreateTableOne(vars = table4names, data=dhs_nomiss_diabetic_only, includeNA=T)
#print(totalwithmiss)

##diabetic as numeric

dhs_nomiss <- mutate(dhs_nomiss,
              ex_diab_broad_ind_dbl = as.numeric(dhs_nomiss$ex_diab_broad_ind)-1)


####Check mean, median, 25th, 75th, min and max  in 1) a district, and 2) a PSU, if ex_diab_narrow_ind !=NA 

####check districts



#uniquestates<-unique(dhs_nomiss$ex_state_ind)

#keeptrack4<-NULL

#for(state in (uniquestates)){
 
#  keeptrack4<-c(keeptrack4,sum(dhs_nomiss$ex_state_ind==state))
#}
#names(keeptrack4)<-uniquestates
#summary(keeptrack4)

####check psus


#uniquepsu<-unique(dhs_nomiss_noNAinpsu$psu)

#keeptrack4<-NULL

#for(dis in (uniquepsu)){
  
  #keeptrack4<-c(keeptrack4,sum(dhs_nomiss_noNAinpsu$psu==dis))
#}
#names(keeptrack4)<-uniquepsu
#summary(keeptrack4)


########################Diabetic Subgroups#################################################

#filter diabetics only
dhs_nomiss_diabetic_only <- filter(dhs_nomiss, (ex_diab_broad_ind)==1) 


##aware diabetic



## Test if there are paradoxically treated but aware patients
dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                     treated_and_unaware = ifelse(hbg12==0 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_diabetic_only$treated_and_unaware <- as.factor(dhs_nomiss_diabetic_only$treated_and_unaware)
summary(dhs_nomiss_diabetic_only$treated_and_unaware)


## Test if there are paradoxically controlled but aware patients
dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                   treated_and_unaware = ifelse(hbg12==0 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_diabetic_only$treated_and_unaware <- as.factor(dhs_nomiss_diabetic_only$treated_and_unaware)
summary(dhs_nomiss_diabetic_only$treated_and_unaware)




##aware diabetic as subset of diabetics



dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                     diabetic_aware = ifelse(hbg12==1, 1, 0))

dhs_nomiss_diabetic_only$diabetic_aware <- as.factor(dhs_nomiss_diabetic_only$diabetic_aware)

dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                     
                     diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss_diabetic_only$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                   diabetic_treated = ifelse(ex_dia_med_ind==1, 1, 0))
dhs_nomiss_diabetic_only[which(is.na(dhs_nomiss_diabetic_only$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss_diabetic_only$diabetic_treated <- as.factor(dhs_nomiss_diabetic_only$diabetic_treated)

dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                   
                                   diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss_diabetic_only$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                             diabetic_controlled = ifelse((ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss_diabetic_only$diabetic_controlled <- as.factor(dhs_nomiss_diabetic_only$diabetic_controlled)

dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                             
                                             diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss_diabetic_only$diabetic_controlled)

## Test if there are paradoxically controlled but aware patients
dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                   controlled_and_unaware = ifelse(diabetic_aware==0 & diabetic_controlled==1, 1, 0))
dhs_nomiss_diabetic_only$controlled_and_unaware <- as.factor(dhs_nomiss_diabetic_only$controlled_and_unaware)
summary(dhs_nomiss_diabetic_only$controlled_and_unaware)

###create Barchart variables with separate subsets###################################################


##get aware diabetics

#table6names <- c( "hbg12", "ex_dia_med_ind") ## hbg12=0 are aware diabetics


#totalwithmiss <- CreateTableOne(vars = table6names, data=dhs_nomiss_diabetic_only, includeNA=T)
#print(totalwithmiss)

#filter diabetic,aware

dhs_nomiss_diabetic_and_aware_only <- filter(dhs_nomiss_diabetic_only, hbg12==1)

#get treated diabetics of aware diabetics

#table7names <- c("ex_dia_med_ind") ###ex_dia_med_ind are treated diabetics


#totalwithmiss <- CreateTableOne(vars = table7names, data=dhs_nomiss_diabetic_and_aware_only, includeNA=T)
#print(totalwithmiss)

#filter diabetic,aware,treated
dhs_nomiss_diabetic_and_treated_only  <- filter(dhs_nomiss_diabetic_only, hbg12==1 & ex_dia_med_ind==1)

#get controlled of diabetic,aware,treated

#table8names <- c("ex_diab_narrow_ind" ) ####ex_diab_narrow_ind are controlled diabetics


#totalwithmiss <- CreateTableOne(vars = table8names, data=dhs_nomiss_diabetic_and_treated_only, includeNA=T)
#print(totalwithmiss)


#create diabetic as numeric
dhs_nomiss <- mutate(dhs_nomiss,
                     ex_diab_broad_ind_dbl = as.numeric(dhs_nomiss$ex_diab_broad_ind)-1)




####Create PSUS for subsets besides dhs, always if working with psu, use dataset with NAinpsu filtered for psu==NA

####NAinpsu for dhs_nomiss

dhs_nomiss <- mutate(dhs_nomiss, 
                     psu = ifelse(psu==1, NA, psu))

summary(dhs_nomiss$psu)

dhs_nomiss_noNAinpsu <- filter(dhs_nomiss, is.na(psu)==F)

summary(dhs_nomiss_noNAinpsu$psu)

#create PSU ID in nomissPSU:
dhs_nomiss_noNAinpsu <- dhs_nomiss_noNAinpsu %>% 
  mutate(psu_str = as.character(psu), 
         psuid = str_c(state_dist_str, psu_str, sep = "_"))
dhs_nomiss_noNAinpsu$psuid <- as.factor(dhs_nomiss_noNAinpsu$psuid)

###create PSU ID in dhs_nomiss:
dhs_nomiss <- dhs_nomiss %>% 
  mutate(psu_str = as.character(psu), 
         psuid = str_c(state_dist_str, psu_str, sep = "_"))
dhs_nomiss$psuid <- as.factor(dhs_nomiss$psuid) 


###create PSU ID in dhs_nomiss_diabetic_only :
dhs_nomiss_diabetic_only <- dhs_nomiss_diabetic_only %>% 
  mutate(psu_str = as.character(psu), 
         psuid = str_c(state_dist_str, psu_str, sep = "_"))
dhs_nomiss_diabetic_only$psuid <- as.factor(dhs_nomiss_diabetic_only$psuid)

###create PSU ID in dhs_nomiss_diabetic_and_aware_only :
dhs_nomiss_diabetic_and_aware_only <- dhs_nomiss_diabetic_and_aware_only %>% 
  mutate(psu_str = as.character(psu), 
         psuid = str_c(state_dist_str, psu_str, sep = "_"))
dhs_nomiss_diabetic_and_aware_only$psuid <- as.factor(dhs_nomiss_diabetic_and_aware_only$psuid) 


###create PSU ID in dhs_nomiss_diabetic_and_treated_only :
dhs_nomiss_diabetic_and_treated_only <- dhs_nomiss_diabetic_and_treated_only %>% 
  mutate(psu_str = as.character(psu), 
         psuid = str_c(state_dist_str, psu_str, sep = "_"))
dhs_nomiss_diabetic_and_treated_only$psuid <- as.factor(dhs_nomiss_diabetic_and_treated_only$psuid) 




###Nainpsu for dhs_nomiss_diabetic_only

dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only, 
                     psu = ifelse(psu==1, NA, psu))

summary(dhs_nomiss_diabetic_only$psu)

dhs_nomiss_diabetic_only_noNAinpsu <- filter(dhs_nomiss_diabetic_only, is.na(psu)==F)

summary(dhs_nomiss_diabetic_only_noNAinpsu$psu)

sum(is.na(dhs_nomiss$psu)==T)

###Nainpsu for dhs_nomiss_diabetic_and_aware_only

dhs_nomiss_diabetic_and_aware_only <- mutate(dhs_nomiss_diabetic_and_aware_only, 
                                   psu = ifelse(psu==1, NA, psu))

summary(dhs_nomiss_diabetic_and_aware_only$psu)

dhs_nomiss_diabetic_and_aware_only_noNAinpsu <- filter(dhs_nomiss_diabetic_and_aware_only, is.na(psu)==F)

summary(dhs_nomiss_diabetic_and_aware_only_noNAinpsu$psu)

sum(is.na(dhs_nomiss_diabetic_and_aware_only_noNAinpsu$psu)==T)

###Nainpsu for dhs_nomiss_diabetic_and_treated_only


dhs_nomiss_diabetic_and_treated_only <- mutate(dhs_nomiss_diabetic_and_treated_only, 
                                             psu = ifelse(psu==1, NA, psu))

summary(dhs_nomiss_diabetic_and_treated_only$psu)

dhs_nomiss_diabetic_and_treated_only_noNAinpsu <- filter(dhs_nomiss_diabetic_and_treated_only, is.na(psu)==F)

summary(dhs_nomiss_diabetic_and_treated_only_noNAinpsu$psu)

sum(is.na(dhs_nomiss_diabetic_and_treated_only_noNAinpsu$psu)==T)





dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                              age_grpOR = 
                                as.factor(ifelse(
                                  age>=15 & age<20, "15-19",
                                  ifelse(age>=20 & age<25, "20-24",
                                         ifelse(age>=25 & age<30, "25-29",
                                                ifelse(age>=30 & age<35,"30-34", 
                                                       ifelse(age>=35 & age<40, "35-39",
                                                              ifelse(age>=40 & age<45, "40-44",
                                                                     ifelse(age>=45 & age<50, "45-49",
                                                                            ifelse(age>=50 & age<55, "50-54","54+"))))))))))

dhs_nomiss_diabetic_only$age_grpOR <- factor(dhs_nomiss_diabetic_only$age_grpOR, levels = c("15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54"))

dhs_nomiss_diabetic_only <- within(dhs_nomiss_diabetic_only, age_grpOR <- relevel(age_grpOR, ref = "15-19"))

  dhs_nomiss_rural <- filter(dhs_nomiss, (urban_lab)=="Rural")
  dhs_nomiss_urban <- filter(dhs_nomiss, (urban_lab)=="Urban")
  dhs_nomiss_diabetic_only_rural <- filter(dhs_nomiss_diabetic_only, (urban_lab)=="Rural")
  dhs_nomiss_diabetic_only_urban <- filter(dhs_nomiss_diabetic_only, (urban_lab)=="Urban")
  
  dhs_nomiss_diabetic_only_rural_women <- filter(dhs_nomiss_diabetic_only, (urban_lab)=="Rural" & sex==1)
  dhs_nomiss_diabetic_only_rural_men <- filter(dhs_nomiss_diabetic_only, (urban_lab)=="Rural" & sex==0)
  
  dhs_nomiss_diabetic_only_urban_women <- filter(dhs_nomiss_diabetic_only, (urban_lab)=="Urban" & sex==1)
  dhs_nomiss_diabetic_only_urban_men <- filter(dhs_nomiss_diabetic_only, (urban_lab)=="Urban" & sex==0)
  
  dhs_nomiss_diabetic_only_women <- filter(dhs_nomiss_diabetic_only,  sex==1)
  dhs_nomiss_diabetic_only_men <- filter(dhs_nomiss_diabetic_only,  sex==0)
  
  
  #merge age standardization weight from GBD India pop into the dataset
dhs_nomiss <-dhs_nomiss %>%
  mutate(age_5yr_2=ifelse(age>=65& age<=69,11,
                                         ifelse(age>=70&age<=74,12,
                                                ifelse(age>=75 &age<=79,13,
                                                      ifelse( age>=80,14,age_5yr)))))

GBDpopweights_2018.04.24.age_grp15.19 <- read.csv("~/iCloud Drive (Archive)/Documents/Public Health Files/Public Health/Multiple Morbidities/Datasets/GBDpopweights_2018-04-24-age_grp15-19.csv")

agest_india <- GBDpopweights_2018.04.24.age_grp15.19
agest_india$sex <-as.factor(agest_india$sex)
dhs_nomiss$sex <- as.factor(dhs_nomiss$sex)

dhs_nomiss <-dhs_nomiss %>%
  mutate(age_grp2=age_grpOR)

dhs_nomiss$age_grp2 <- as.factor(dhs_nomiss$age_grp2)

agest_india<-agest_india[c(1,2,3,4,5,6,7,15,16,17,18,19,20,21,22), ]


agest_india$age_grp2 <- droplevels(agest_india$age_grp2)



dhs_nomiss <- left_join(dhs_nomiss, agest_india) 
dhs_nomiss <- mutate(dhs_nomiss, 
                    sworld_weight_india = p_wt_new*gbd_weight)

dhs_nomiss <- mutate(dhs_nomiss, 
                    sworld_weight_india = ifelse(is.na(gbd_weight)==TRUE, mean(sworld_weight_india, na.rm=TRUE), sworld_weight_india))
  
  

```



```{r heatmap Diabetes men women}
  
dhs_nomiss_men_only <- dplyr::mutate(dhs_nomiss_men_only, age_grp_men = ifelse(age<=24, "15-24", 
                                                 ifelse(age>24 &  age<=34, "25-34",
                                                        ifelse(age>34 & age<=44, "35-44", 
                                                               ifelse(age>44 & age<=49, "45-49", ">49"))))) 
dhs_nomiss_men_only$age_grp_men <- factor(dhs_nomiss_men_only$age_grp_men, levels = c("15-24", "25-34", "35-44", "45-49", ">49"))

dhs_nomiss_men_only <- within(dhs_nomiss_men_only, age_grp_men <- relevel(age_grp_men, ref = "15-24"))



###heatmap Diabetes

  #####men only Diabetes
  
  dhs_nomiss_men_only <- filter(dhs_nomiss, (sex)==0) 
  diab_men_heatdat <- dhs_nomiss_men_only %>%
    filter(is.na(wealth_quintile_rurb)==FALSE & is.na(age)==FALSE) %>% 
    group_by( female_lab, wealth_quintile_rurb, age_grp_men, urban) %>% 
    mutate(diab_indiv = 100*weighted.mean(ex_diab_broad_ind_dbl, p_wt_new, na.rm=TRUE)) %>% 
    filter(row_number()==1) %>%   
    dplyr::select( wealth_quintile_rurb_lab,female_lab, age_grp_men, urban_lab, diab_indiv, wealth_quintile_rurb, urban)
  
  # Now create the actual heatmap: 
  diab_men_wealth_heat <- ggplot(data=diab_men_heatdat, aes(x=wealth_quintile_rurb_lab, y=age_grp_men)) +
    geom_tile(aes(fill=diab_indiv)) + 
    geom_text(aes(label=sprintf("%1.1f", diab_indiv)), size=5) +
    facet_grid(~urban_lab) +
    scale_fill_distiller(palette = "RdYlGn", direction = -1) +
    theme_classic() +
    labs(x = "Household Wealth Quintile",
         y = "Age Group, y",
         fill="") +
    theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
          axis.text.y =element_text(size=15, face="bold", family="Times"),
          axis.title=element_text(size=22, face="italic", family="Times"),
          legend.title=element_blank(),
          legend.text=element_text(size=18, family="Times"),
          strip.text=element_text(size=18, family="Times", face="bold"), 
          panel.spacing = unit(2, "lines"),
          axis.title.x = element_text(margin = margin(t = 20), family="Times"),
          axis.title.y = element_text(margin = margin(r = 20), family="Times"),
          plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
    coord_fixed(5/4)
  diab_men_wealth_heat
  
  dev.copy(pdf,'diab men.pdf')
  dev.off()
  

#####Women only Diabetes

dhs_nomiss_women_only <- filter(dhs_nomiss, (sex)==1) 
diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(wealth_quintile_rurb)==FALSE & is.na(age)==FALSE) %>% 
  group_by( female_lab, wealth_quintile_rurb, age_grp_women, urban) %>% 
  mutate(diab_indiv = 100*weighted.mean(ex_diab_broad_ind_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( wealth_quintile_rurb_lab,female_lab, age_grp_women, urban_lab, diab_indiv, wealth_quintile_rurb, urban)

# Now create the actual heatmap: 
diab_women_wealth_heat <- ggplot(data=diab_women_heatdat, aes(x=wealth_quintile_rurb_lab, y=age_grp_women)) +
  geom_tile(aes(fill=diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", diab_indiv)), size=5) +
  facet_grid(~urban_lab) +
  scale_fill_distiller(palette = "RdYlGn", direction = -1) +
  theme_classic() +
  labs(x = "Household Wealth Quintile",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
diab_women_wealth_heat

dev.copy(pdf,'diab women.pdf')
dev.off()


```

```{r heatmap cascade steps wealth}

###heatmap aware diabetes AS SUBSET

##### aware

dhs_nomiss_diab_only <- dhs_nomiss_diabetic_only
aware_diab_women_heatdat <- dhs_nomiss_diab_only %>%
  filter(is.na(wealth_quintile_rurb)==FALSE & is.na(age)==FALSE) %>% 
  group_by( wealth_quintile_rurb, age_grp) %>% 
  mutate(aware_diab_indiv = 100*weighted.mean(diabetic_aware_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( wealth_quintile_rurb_lab, age_grp, urban_lab, aware_diab_indiv, wealth_quintile_rurb, urban)

# Now create the actual heatmap: 
aware_diab_women_wealth_heat <- ggplot(data=aware_diab_women_heatdat, aes(x=wealth_quintile_rurb_lab, y=age_grp)) +
  geom_tile(aes(fill=aware_diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", aware_diab_indiv)), size=5) +
  scale_fill_distiller(palette = "RdYlGn", direction = 1) +
  theme_classic() +
  labs(x = "Household Wealth Quintile",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
aware_diab_women_wealth_heat

dev.copy(pdf,'aware 25-3.pdf')
dev.off()



###heatmap treated diabetes AS SUBSET


##### treated

dhs_nomiss_women_only <- dhs_nomiss_diabetic_only
treated_diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(wealth_quintile_rurb)==FALSE & is.na(age)==FALSE) %>% 
  group_by( wealth_quintile_rurb, age_grp) %>% 
  mutate(treated_diab_indiv = 100*weighted.mean(diabetic_treated_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( wealth_quintile_rurb_lab, age_grp, urban_lab, treated_diab_indiv, wealth_quintile_rurb, urban)

# Now create the actual heatmap: 
treated_diab_women_wealth_heat <- ggplot(data=treated_diab_women_heatdat, aes(x=wealth_quintile_rurb_lab, y=age_grp)) +
  geom_tile(aes(fill=treated_diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", treated_diab_indiv)), size=5) +
  
  scale_fill_distiller(palette = "RdYlGn", direction = 1) +
  theme_classic() +
  labs(x = "Household Wealth Quintile",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
treated_diab_women_wealth_heat

dev.copy(pdf,'treated 25-3.pdf')
dev.off()




#*************** controlled Diabetes of diabetics   ******************


#####all controlled

dhs_nomiss_women_only <- dhs_nomiss_diabetic_only
controlled_diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(wealth_quintile_rurb)==FALSE & is.na(age)==FALSE) %>% 
  group_by( wealth_quintile_rurb, age_grp) %>% 
  mutate(controlled_diab_indiv = 100*weighted.mean(diabetic_controlled_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( wealth_quintile_rurb_lab, age_grp, urban_lab, controlled_diab_indiv, wealth_quintile_rurb, urban)

# Now create the actual heatmap: 
controlled_diab_women_wealth_heat <- ggplot(data=controlled_diab_women_heatdat, aes(x=wealth_quintile_rurb_lab, y=age_grp)) +
  geom_tile(aes(fill=controlled_diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", controlled_diab_indiv)), size=5) +
  scale_fill_distiller(palette = "RdYlGn", direction = 1) +
  theme_classic() +
  labs(x = "Household Wealth Quintile",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
controlled_diab_women_wealth_heat

dev.copy(pdf,'controlled 25-3.pdf')
dev.off()

```


```{r heatmap education men women only}

#Education#

###heatmap Diabetes Education

#####Men only Diabetes EDUCATION

dhs_nomiss_women_only <- filter(dhs_nomiss, (sex)==0) 
diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(educat)==FALSE & is.na(age_grp)==FALSE) %>% 
  group_by( educat, age_grp, urban) %>% 
  mutate(diab_indiv = 100*weighted.mean(ex_diab_broad_ind_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( educatnames, age_grp, urban_lab, diab_indiv, educat, urban)

# Now create the actual heatmap: 
diab_women_wealth_heat <- ggplot(data=diab_women_heatdat, aes(x=educatnames, y=age_grp)) +
  geom_tile(aes(fill=diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", diab_indiv)), size=5) +
  facet_grid(. ~urban_lab) +
  scale_fill_distiller(palette = "RdYlGn", direction = -1) +
  theme_classic() +
  labs(x = "Education",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(4/4)
diab_women_wealth_heat

dev.copy(pdf,'educat diab men.pdf')
dev.off()


#####Women only Diabetes EDUCATION

dhs_nomiss_women_only <- filter(dhs_nomiss, (sex)==1) 
diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(educat)==FALSE & is.na(age_grp)==FALSE) %>% 
  group_by( educat, age_grp, urban) %>% 
  mutate(diab_indiv = 100*weighted.mean(ex_diab_broad_ind_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( educatnames, age_grp, urban_lab, diab_indiv, educat, urban)

# Now create the actual heatmap: 
diab_women_wealth_heat <- ggplot(data=diab_women_heatdat, aes(x=educatnames, y=age_grp)) +
  geom_tile(aes(fill=diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", diab_indiv)), size=5) +
  facet_grid(. ~urban_lab) +
  scale_fill_distiller(palette = "RdYlGn", direction = -1) +
  theme_classic() +
  labs(x = "Education",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(4/4)
diab_women_wealth_heat

dev.copy(pdf,'educat diab women.pdf')
dev.off()

```


```{r heatmap care steps education}

###heatmap aware diabetes AS SUBSET EDUCATION

#####aware EDUCATION

dhs_nomiss_women_only <- dhs_nomiss_diabetic_only
aware_diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(educat)==FALSE & is.na(age_grp_educat_men)==FALSE) %>% 
  group_by( educat, age_grp_educat_men) %>% 
  mutate(aware_diab_indiv = 100*weighted.mean(diabetic_aware_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( educat, age_grp_educat_men, urban_lab, aware_diab_indiv, educatnames, urban)

# Now create the actual heatmap: 
aware_diab_women_wealth_heat <- ggplot(data=aware_diab_women_heatdat, aes(x=educatnames, y=age_grp_educat_men)) +
  geom_tile(aes(fill=aware_diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", aware_diab_indiv)), size=5) +
  scale_fill_distiller(palette = "RdYlGn", direction = 1) +
  theme_classic() +
  labs(x = "Education",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
aware_diab_women_wealth_heat

dev.copy(pdf,'educat aware 3-25 until 54.pdf')
dev.off()


###heatmap treated diabetes AS SUBSET EDUCATION

#####treated EDUCATION

dhs_nomiss_men_only <- dhs_nomiss_diabetic_only 
treated_diab_men_heatdat <- dhs_nomiss_men_only %>%
  filter(is.na(educat)==FALSE & is.na(age_grp_educat_men)==FALSE) %>% 
  group_by( educat, age_grp_educat_men) %>% 
  mutate(treated_diab_indiv = 100*weighted.mean(diabetic_treated_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( educat, age_grp_educat_men, urban_lab, treated_diab_indiv, educatnames, urban)

# Now create the actual heatmap: 
treated_diab_men_wealth_heat <- ggplot(data=treated_diab_men_heatdat, aes(x=educatnames, y=age_grp_educat_men)) +
  geom_tile(aes(fill=treated_diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", treated_diab_indiv)), size=5) +
  scale_fill_distiller(palette = "RdYlGn", direction = 1) +
 
  theme_classic() +
  labs(x = "Education",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
treated_diab_men_wealth_heat

dev.copy(pdf,'educat treated 3-25 until 54.pdf')
dev.off()



#### controlled EDUCATION

dhs_nomiss_women_only <- dhs_nomiss_diabetic_only
controlled_diab_women_heatdat <- dhs_nomiss_women_only %>%
  filter(is.na(educat)==FALSE & is.na(age_grp_educat_men)==FALSE) %>% 
  group_by(educat, age_grp_educat_men) %>% 
  mutate(controlled_diab_indiv = 100*weighted.mean(diabetic_controlled_dbl, p_wt_new, na.rm=TRUE)) %>% 
  filter(row_number()==1) %>%   
  dplyr::select( educat, age_grp_educat_men, urban_lab, controlled_diab_indiv, educatnames, urban, female_lab)

# Now create the actual heatmap: 
controlled_diab_women_wealth_heat <- ggplot(data=controlled_diab_women_heatdat, aes(x=educatnames, y=age_grp_educat_men)) +
  geom_tile(aes(fill=controlled_diab_indiv)) + 
  geom_text(aes(label=sprintf("%1.1f", controlled_diab_indiv)), size=5) +
  scale_fill_distiller(palette = "RdYlGn", direction = 1) +
  theme_classic() +
  labs(x = "Education",
       y = "Age Group, y",
       fill="") +
  theme(axis.text.x =element_text(size=15, face="bold", family="Times", angle=45, hjust=1),
        axis.text.y =element_text(size=15, face="bold", family="Times"),
        axis.title=element_text(size=22, face="italic", family="Times"),
        legend.title=element_blank(),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times", face="bold"), 
        panel.spacing = unit(2, "lines"),
        axis.title.x = element_text(margin = margin(t = 20), family="Times"),
        axis.title.y = element_text(margin = margin(r = 20), family="Times"),
        plot.title = element_text(size = 25, hjust = 0.5, family="Times", face = "bold")) +
  coord_fixed(5/4)
controlled_diab_women_wealth_heat

dev.copy(pdf,'educat controlled 3-25 until 54.pdf')
dev.off() 


```



```{r logistic regressions no clustering but interaction educat for figure}

#Logistic regression


###Regression


####Regressions without clustering but with interaction educat for figure


multiv_feglm <- glm(formula = ex_diab_broad_ind ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + educat:age_grpOR + educat:urban + age_grpOR:urban, data=dhs_nomiss, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feglm), confint(multiv_feglm)))[2:20,]

save(file="glmdiab_03_22",multiv_feglm)
results_df <-summary.glm(multiv_feglm)$coefficients
write.csv(results_df, "resultsORglmdiab_03_22.csv")


multiv_feglmtreated <- glm(formula = diabetic_treated_dbl ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + educat:age_grpOR + educat:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))

save(file="glm.treated_2018_03_22",multiv_feglmtreated)
results_treated <-summary.glm(multiv_feglmtreated)$coefficients
write.csv(results_treated, "resultsORglm_treated_2018_03_22.csv")

multiv_feglmaware <- glm(formula = diabetic_aware_dbl ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + educat:age_grpOR + educat:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))

save(file="glm.aware_2018_03_22",multiv_feglmaware)
results_aware <-summary.glm(multiv_feglmaware)$coefficients
write.csv(results_aware, "resultsORglm_aware_2018_03_22.csv")

multiv_feglmcontrolled <- glm(formula = diabetic_controlled_dbl ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + educat:age_grpOR + educat:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))

save(file="glm.controlled_2018_03_22",multiv_feglmcontrolled)
results_controlled <-summary.glm(multiv_feglmcontrolled)$coefficients
write.csv(results_controlled, "resultsORglm_controlled_2018_03_22.csv")

```

```{r logistic regressions no clustering but interaction wealth for figure}

###Regression without clustering but with interaction wealth for figure NO weights


multiv_feglmtreated <- glm(formula = diabetic_treated_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban,  data=dhs_nomiss_diabetic_only,  family=binomial(link="logit"))

#save(file="glm.treated_2018_04_27",multiv_feglmtreated)
results_treated <-summary.glm(multiv_feglmtreated)$coefficients
write.csv(results_treated, "resultsORglm_figure_weights_treated_2018_04_27.csv")

multiv_feglmaware <- glm(formula = diabetic_aware_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban,  data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))

#save(file="glm.aware_2018_04_27",multiv_feglmaware)
results_aware <-summary.glm(multiv_feglmaware)$coefficients
write.csv(results_aware, "resultsORglm_figure_weights_aware_2018_04_27.csv")




multiv_feglmcontrolled <- glm(formula = diabetic_controlled_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only,  family=binomial(link="logit"))

#save(file="glm.controlled_2018_04_27",multiv_feglmcontrolled)
results_controlled <-summary.glm(multiv_feglmcontrolled)$coefficients
write.csv(results_controlled, "resultsORglm_figure_weights_controlled_2018_04_27.csv")


```

```{r logistic regression no clusterin interaction wealth WEIGHTS}

###Regression without clustering but with interaction wealth for figure WITH WEIGHTS


multiv_feglmtreated <- glm(formula = diabetic_treated_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, weights= p_wt_new,  data=dhs_nomiss_diabetic_only,  family=binomial(link="logit"))

#save(file="glm.treated_2018_04_27",multiv_feglmtreated)
results_treated <-summary.glm(multiv_feglmtreated)$coefficients
write.csv(results_treated, "resultsORglm_figure_weights_treated_2018_04_27.csv")

multiv_feglmaware <- glm(formula = diabetic_aware_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))

#save(file="glm.aware_2018_04_27",multiv_feglmaware)
results_aware <-summary.glm(multiv_feglmaware)$coefficients
write.csv(results_aware, "resultsORglm_figure_weights_aware_2018_04_27.csv")




multiv_feglmcontrolled <- glm(formula = diabetic_controlled_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, weights= p_wt_new, data=dhs_nomiss_diabetic_only,  family=binomial(link="logit"))

#save(file="glm.controlled_2018_04_27",multiv_feglmcontrolled)
results_controlled <-summary.glm(multiv_feglmcontrolled)$coefficients
write.csv(results_controlled, "resultsORglm_figure_weights_controlled_2018_04_27.csv")

```

```{r regression no clustering interaction wealth WOMEN FOR REG FIG}

###Regression without clustering but with interaction wealth for figure WOMENNNN


multiv_feglmtreated <- glm(formula = diabetic_treated_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban,  data=dhs_nomiss_diabetic_only_women,  family=binomial(link="logit"))

#save(file="glm.treated_2018_04_27",multiv_feglmtreated)
results_treated <-summary.glm(multiv_feglmtreated)$coefficients
write.csv(results_treated, "resultsORglm_figure_persexwomen_treated_2018_04_27.csv")

multiv_feglmaware <- glm(formula = diabetic_aware_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only_women, family=binomial(link="logit"))

#save(file="glm.aware_2018_04_27",multiv_feglmaware)
results_aware <-summary.glm(multiv_feglmaware)$coefficients
write.csv(results_aware, "resultsORglm_figure_persexwomen_aware_2018_04_27.csv")




multiv_feglmcontrolled <- glm(formula = diabetic_controlled_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only_women,  family=binomial(link="logit"))

#save(file="glm.controlled_2018_04_27",multiv_feglmcontrolled)
results_controlled <-summary.glm(multiv_feglmcontrolled)$coefficients
write.csv(results_controlled, "resultsORglm_figure_persexwomen_controlled_2018_04_27.csv")

```

```{r regression no clustering interaction wealth MEN FOR REG FIG}

# reg figure wealth MEN#

multiv_feglmtreated <- glm(formula = diabetic_treated_dbl ~ age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban,  data=dhs_nomiss_diabetic_only_men,  family=binomial(link="logit"))

#save(file="glm.treated_2018_04_27",multiv_feglmtreated)
results_treated <-summary.glm(multiv_feglmtreated)$coefficients
write.csv(results_treated, "resultsORglm_figure_persexmen_treated_2018_04_27.csv")

multiv_feglmaware <- glm(formula = diabetic_aware_dbl ~ age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only_men,  family=binomial(link="logit"))

#save(file="glm.aware_2018_04_27",multiv_feglmaware)
results_aware <-summary.glm(multiv_feglmaware)$coefficients
write.csv(results_aware, "resultsORglm_figure_persexmen_aware_2018_04_27.csv")




multiv_feglmcontrolled <- glm(formula = diabetic_controlled_dbl ~  age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id + wealth_quintile_rurb:age_grpOR + wealth_quintile_rurb:urban + age_grpOR:urban, data=dhs_nomiss_diabetic_only_men,  family=binomial(link="logit"))

#save(file="glm.controlled_2018_04_27",multiv_feglmcontrolled)
results_controlled <-summary.glm(multiv_feglmcontrolled)$coefficients
write.csv(results_controlled, "resultsORglm_figure_persexmen_controlled_2018_04_27.csv")

```

```{r regression with clustering}

####Regressions with clustering


#multiv_feclust <- glm.cluster(formula = ex_diab_broad_ind ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id, cluster="psu", data=dhs_nomiss, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclust), confint(multiv_feclust)))[2:656,] 

#save(file="glm.cluster2018_03_22",multiv_feclust)
#results_clust <-summary(multiv_feclust)
#write.csv(results_clust, "resultsglmclust2018_03_22.csv")

multiv_feclustaware <- glm.cluster(formula = diabetic_aware_dbl ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id, cluster="psu", data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclust), confint(multiv_feclust)))[2:656,] 

save(file="glm.clusteraware2018_03_22",multiv_feclustaware)
results_clustaware <-summary(multiv_feclustaware)
write.csv(results_clustaware, "resultsglmclustaware2018_03_22.csv")

multiv_feclusttreated <- glm.cluster(formula = diabetic_treated_dbl ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id, cluster="psu", data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclust), confint(multiv_feclust)))[2:656,] 

save(file="glm.clustertreated2018_03_22",multiv_feclusttreated)
results_clusttreated <-summary(multiv_feclusttreated)
write.csv(results_clusttreated, "resultsglmclusttreated2018_03_22.csv")

multiv_feclustcontrolled <- glm.cluster(formula = diabetic_controlled_dbl ~ sex + age_grpOR + married + wealth_quintile_rurb + educat + urban + d_id, cluster="psu", data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclust), confint(multiv_feclust)))[2:656,] 

save(file="glm.clustercontrolled2018_03_22",multiv_feclustcontrolled)
results_clustcontrolled <-summary(multiv_feclustcontrolled)
write.csv(results_clustcontrolled, "resultsglmclustcontrolled2018_03_22.csv")

```


```{r contrasts}


dhs_nomiss$educatnames<-droplevels(dhs_nomiss$educatnames)

dhs_nomiss_diabetic_only_rural$educatnames_ordered<-ordered(dhs_nomiss_diabetic_only_rural$educatnames, levels = c("Primary schoon finished/unfinished", "Secondary school unfinished", "Secondary school or above"))

dhs_nomiss_diabetic_only_urban$educatnames_ordered<-ordered(dhs_nomiss_diabetic_only_urban$educatnames, levels = c("Primary schoon finished/unfinished", "Secondary school unfinished", "Secondary school or above"))


dhs_nomiss_diabetic_only_rural$wealth_quintile_rurb_ordered<-ordered(dhs_nomiss_diabetic_only_rural$wealth_quintile_rurb, levels = c(1,2,3,4,5))

dhs_nomiss_diabetic_only_urban$wealth_quintile_rurb_ordered<-ordered(dhs_nomiss_diabetic_only_urban$wealth_quintile_rurb, levels = c(1,2,3,4,5))








aware_rural_contrast<-summary.glm(glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb_ordered + educatnames_ordered + married +sex + d_id , weights= p_wt_new,  data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit")))$coefficients

write.csv(aware_rural_contrast, "contrast_aware_rural.csv")

aware_urban_contrast<-summary.glm(glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb_ordered + educatnames_ordered + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit")))$coefficients

write.csv(aware_urban_contrast, "contrast_aware_urban.csv")


treated_rural_contrast<-summary.glm(glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb_ordered + educatnames_ordered + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit")))$coefficients

write.csv(treated_rural_contrast, "contrast_treated_rural.csv")

treated_urban_contrast<-summary.glm(glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb_ordered + educatnames_ordered + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit")))$coefficients

write.csv(treated_urban_contrast, "contrast_treated_urban.csv")

controlled_rural_contrast<-summary.glm(glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb_ordered + educatnames_ordered + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit")))$coefficients

write.csv(controlled_rural_contrast, "contrast_controlled_rural.csv")

controlled_urban_contrast<-summary.glm(glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb_ordered + educatnames_ordered + married +sex + d_id, weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit")))$coefficients

write.csv(controlled_urban_contrast, "contrast_controlled_urban.csv")





```


```{r regressions for new spline figure}


 library(rms)

#dhs_nomiss_diabetic_only_rural$age <- as.numeric(dhs_nomiss_diabetic_only_rural$age)
#dhs_nomiss_diabetic_only_urban$age <- as.numeric(dhs_nomiss_diabetic_only_urban$age)
###RURAL


aware_spline_rural<-glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, weights= p_wt_new,  data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))






treated_spline_rural<-glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))




controlled_spline_rural<-glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))







#Regression figure for aware wealth quintile#

dhs_nomiss_diabetic_only_aware_regress_rural <- filter(dhs_nomiss_diabetic_only_rural, is.na(diabetic_aware_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                              is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_aware_regress_rural <- augment(aware_spline_rural, dhs_nomiss_diabetic_only_aware_regress_rural, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_aware_regress_rural <- dhs_nomiss_diabetic_only_aware_regress_rural %>% 
  mutate(prob_aware = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


#Regression figure for treated wealth quintile#

dhs_nomiss_diabetic_only_treated_regress_rural <- filter(dhs_nomiss_diabetic_only_rural, is.na(diabetic_treated_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                              is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_treated_regress_rural <- augment(treated_spline_rural, dhs_nomiss_diabetic_only_treated_regress_rural, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_treated_regress_rural <- dhs_nomiss_diabetic_only_treated_regress_rural %>% 
  mutate(prob_treated = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)

#Regression figure for controlled wealth quintile#

dhs_nomiss_diabetic_only_controlled_regress_rural <- filter(dhs_nomiss_diabetic_only_rural, is.na(diabetic_controlled_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                              is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_controlled_regress_rural <- augment(controlled_spline_rural, dhs_nomiss_diabetic_only_controlled_regress_rural, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_controlled_regress_rural <- dhs_nomiss_diabetic_only_controlled_regress_rural %>% 
  mutate(prob_controlled = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)




###urban


aware_spline_urban<-glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id  , weights= p_wt_new,  data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))






treated_spline_urban<-glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id ,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))




controlled_spline_urban<-glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))


#if including interaction term use this
 #+ wealth_quintile_rurb:age + wealth_quintile_rurb:urban + age:urban


#Regression figure for aware wealth quintile#

dhs_nomiss_diabetic_only_aware_regress_urban <- filter(dhs_nomiss_diabetic_only_urban, is.na(diabetic_aware_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                              is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_aware_regress_urban <- augment(aware_spline_urban, dhs_nomiss_diabetic_only_aware_regress_urban, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_aware_regress_urban <- dhs_nomiss_diabetic_only_aware_regress_urban %>% 
  mutate(prob_aware = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


#Regression figure for treated wealth quintile#

dhs_nomiss_diabetic_only_treated_regress_urban <- filter(dhs_nomiss_diabetic_only_urban, is.na(diabetic_treated_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                              is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_treated_regress_urban <- augment(treated_spline_urban, dhs_nomiss_diabetic_only_treated_regress_urban, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_treated_regress_urban <- dhs_nomiss_diabetic_only_treated_regress_urban %>% 
  mutate(prob_treated = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)

#Regression figure for controlled wealth quintile#

dhs_nomiss_diabetic_only_controlled_regress_urban <- filter(dhs_nomiss_diabetic_only_urban, is.na(diabetic_controlled_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                              is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_controlled_regress_urban <- augment(controlled_spline_urban, dhs_nomiss_diabetic_only_controlled_regress_urban, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_controlled_regress_urban <- dhs_nomiss_diabetic_only_controlled_regress_urban %>% 
  mutate(prob_controlled = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)



```

```{r new spline figure age}



##RURAL


# Create dataset for plotting
pmeans_diab <- aggregate(prob_aware ~ age, data = dhs_nomiss_diabetic_only_aware_regress_rural, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age, data = dhs_nomiss_diabetic_only_aware_regress_rural, FUN = mean)
figawarerural.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age"))
figawarerural.diab <- mutate(figawarerural.diab, 
                   lowerci = prob_aware-(1.96*se_diab),
                   upperci = prob_aware+(1.96*se_diab))


write.csv(figawarerural.diab, "regressionfigure_aware_wealth diab SIMPLErural.csv")





# Create dataset for plotting
pmeans_diab <- aggregate(prob_treated ~ age, data = dhs_nomiss_diabetic_only_treated_regress_rural, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age, data = dhs_nomiss_diabetic_only_treated_regress_rural, FUN = mean)
figtreatedrural.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age"))
figtreatedrural.diab <- mutate(figtreatedrural.diab, 
                   lowerci = prob_treated-(1.96*se_diab),
                   upperci = prob_treated+(1.96*se_diab))


write.csv(figtreatedrural.diab, "regressionfigure_treated_wealth diab SIMPLErural.csv")




# Create dataset for plotting
pmeans_diab <- aggregate(prob_controlled ~ age, data = dhs_nomiss_diabetic_only_controlled_regress_rural, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age, data = dhs_nomiss_diabetic_only_controlled_regress_rural, FUN = mean)
figcontrolledrural.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age"))
figcontrolledrural.diab <- mutate(figcontrolledrural.diab, 
                   lowerci = prob_controlled-(1.96*se_diab),
                   upperci = prob_controlled+(1.96*se_diab))


write.csv(figcontrolledrural.diab, "regressionfigure_controlled_wealth diab SIMPLErural.csv")



figawarefinalrural.diab <- left_join(figawarerural.diab, figtreatedrural.diab, by = c("age"))
figawarefinalrural.diab <- left_join(figawarefinalrural.diab, figcontrolledrural.diab, by = c("age"))

figawarefinalrural.diab <- mutate(figawarefinalrural.diab, 
                              prob_aware = prob_aware*100,
                              prob_treated = prob_treated*100,
                   prob_controlled = prob_controlled*100)

count <- c(112,111,104,135,104,140,109,124,143,149,237,168,167,213,176,383,193,278,224,290,503,343,343,427,356,767,339,540,432,428,977,550,517,753,551)

figawarefinalrural.diab <-cbind(figawarefinalrural.diab,count)

##to get count estimates

#dhs_nomiss_diabetic_only$age_factor <- as.factor(dhs_nomiss_diabetic_only$age)

#age <-summary(dhs_nomiss_diabetic_only$age_factor)

                   

###make figure


stateawarefig <- figawarefinalrural.diab %>% 
    ggplot()+
  geom_bar(aes(x = age, y = count/20), stat = "identity",  alpha=0.7,width=1)+
  #     geom_histogram(aes(x=age, y=(count)), binwidth=0.1, alpha=0.7) +
 #  geom_point(aes(y=prob_screened, x=age), size=2.5, color="blue") +
      geom_smooth(aes(y=prob_aware, x=age),method='loess', se= FALSE, color="red") +
 #  geom_point(aes(y=prob_aware, x=age), size=2.5, color="red") +
     geom_smooth(aes(y=prob_treated, x=age),method='loess', se= FALSE, color="orange") +
#   geom_point(aes(y=prob_treated, x=age), size=2.5, color="orange") +
      geom_smooth(aes(y=prob_controlled, x=age),method='loess', se= FALSE, color="purple") +
 #  geom_point(aes(y=prob_controlled, x=age), size=2.5, color="purple") +
  theme_classic() + 
  labs(x = "age (years)",
       y = " Percentage",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(20, 40,60,80,100), limits=c(0, 100)) +
  scale_x_continuous(breaks = c(15,20,30,40,49), limits=c(15, 49)) +
  coord_fixed(34/100, expand=F)
stateawarefig






##urban


# Create dataset for plotting
pmeans_diab <- aggregate(prob_aware ~ age, data = dhs_nomiss_diabetic_only_aware_regress_urban, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age, data = dhs_nomiss_diabetic_only_aware_regress_urban, FUN = mean)
figawareurban.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age"))
figawareurban.diab <- mutate(figawareurban.diab, 
                   lowerci = prob_aware-(1.96*se_diab),
                   upperci = prob_aware+(1.96*se_diab))


write.csv(figawareurban.diab, "regressionfigure_aware_wealth diab SIMPLEurban.csv")





# Create dataset for plotting
pmeans_diab <- aggregate(prob_treated ~ age, data = dhs_nomiss_diabetic_only_treated_regress_urban, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age, data = dhs_nomiss_diabetic_only_treated_regress_urban, FUN = mean)
figtreatedurban.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age"))
figtreatedurban.diab <- mutate(figtreatedurban.diab, 
                   lowerci = prob_treated-(1.96*se_diab),
                   upperci = prob_treated+(1.96*se_diab))


write.csv(figtreatedurban.diab, "regressionfigure_treated_wealth diab SIMPLEurban.csv")




# Create dataset for plotting
pmeans_diab <- aggregate(prob_controlled ~ age, data = dhs_nomiss_diabetic_only_controlled_regress_urban, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age, data = dhs_nomiss_diabetic_only_controlled_regress_urban, FUN = mean)
figcontrolledurban.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age"))
figcontrolledurban.diab <- mutate(figcontrolledurban.diab, 
                   lowerci = prob_controlled-(1.96*se_diab),
                   upperci = prob_controlled+(1.96*se_diab))


write.csv(figcontrolledurban.diab, "regressionfigure_controlled_wealth diab SIMPLEurban.csv")



figawarefinalurban.diab <- left_join(figawareurban.diab, figtreatedurban.diab, by = c("age"))
figawarefinalurban.diab <- left_join(figawarefinalurban.diab, figcontrolledurban.diab, by = c("age"))

figawarefinalurban.diab <- mutate(figawarefinalurban.diab, 
                              prob_aware = prob_aware*100,
                              prob_treated = prob_treated*100,
                   prob_controlled = prob_controlled*100)

count <- c(32,37,33,55,37,53,43,68,62,47,96,76,87,125,111,169,123,195,165,191,313,241,211,319,279,534,303,463,378,405,754,453,466,629,514)

figawarefinalurban.diab <-cbind(figawarefinalurban.diab,count)

##to get count estimates

#dhs_nomiss_diabetic_only$age_factor <- as.factor(dhs_nomiss_diabetic_only$age)

#age <-summary(dhs_nomiss_diabetic_only$age_factor)

                   

###make figure


stateawarefig <- figawarefinalurban.diab %>% 
    ggplot()+
  geom_bar(aes(x = age, y = count/20), stat = "identity",  alpha=0.7,width=1)+
  #     geom_histogram(aes(x=age, y=(count)), binwidth=0.1, alpha=0.7) +
 #  geom_point(aes(y=prob_screened, x=age), size=2.5, color="blue") +
      geom_smooth(aes(y=prob_aware, x=age),method='loess', se= FALSE, color="red") +
 #  geom_point(aes(y=prob_aware, x=age), size=2.5, color="red") +
     geom_smooth(aes(y=prob_treated, x=age),method='loess', se= FALSE, color="orange") +
#   geom_point(aes(y=prob_treated, x=age), size=2.5, color="orange") +
      geom_smooth(aes(y=prob_controlled, x=age),method='loess', se= FALSE, color="purple") +
 #  geom_point(aes(y=prob_controlled, x=age), size=2.5, color="purple") +
  theme_classic() + 
  labs(x = "age (years)",
       y = " Percentage",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(20, 40,60,80,100), limits=c(0, 100)) +
  scale_x_continuous(breaks = c(15,20,30,40,49), limits=c(15, 49)) +
  coord_fixed(34/100, expand=F)
stateawarefig


 




  
  
  
  
  #  geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$prob_screened ~ figawarefinal.diab$age,span=1)),x=age), alpha = 1, fill = "firebrick1") +
#    geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$prob_aware ~ figawarefinal.diab$age,span=1)),x=age), alpha = 1, fill = "darkorange1") +
#    geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$prob_treated ~ figawarefinal.diab$age,span=1)),x=age), alpha = 1, fill = "darkviolet") +
#    geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$prob_controlled ~ figawarefinal.diab$age,span=1)),x=age), alpha = 1, fill = "gray23") +
#  geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$twomorb ~ figawarefinal.diab$age,span=1)),x=$age), alpha = 1, fill = "firebrick3") +
#  geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$threemorb ~ figawarefinal.diab$age,span=1)),x=age), alpha = 1, fill = "firebrick4") +
 # geom_ribbon(aes(ymin = 0, ymax = predict(loess(figawarefinal.diab$fourmorb ~ figawarefinal.diab$age,span=1)),x=age), alpha = 1, fill = "gray23") +
  
  
  
  
  

#  ggplot(aes(y=diab_screened_dbl, x=age)) +
#  geom_smooth(method='glm', formula= diab_screened_dbl ~ rcs(age,5) + rcs(asset_index,5) + educatnames + married +sex +urban + rcs(bmi,5) + tobacco_smoked + tobacco_smokeless + d_id + asset_index:age + asset_index:urban + age:urban, se= FALSE, color="black") +
#  geom_jitter(aes(y=diab_screened_dbl, x=age), size=2.5) +
#  theme_classic() + 
#  labs(x = "Screened, in %",
#       y = " Hypertension prevalence, in %",
 #      fill="") +
 # theme(axis.text=element_text(size=20),
#        axis.title=element_text(size=22, face="bold"),
#        legend.text=element_text(size=20),
#        legend.title = element_blank(),
        #legend.position="bottom",
#        axis.title.x = element_text(margin = margin(t = 20)),
#        axis.title.y = element_text(margin = margin(r = 20)),
#        strip.text.x = element_text(size=22, face="bold"),
#        strip.background = element_blank(),
#        panel.spacing = unit(2, "lines")) + 
#  scale_color_brewer(palette="Dark2") +
#  scale_y_continuous(breaks = c(0, 5, 10,15, 20, 25,30), limits=c(0, 30)) +
#  scale_x_continuous(breaks = c(20, 40, 60, 80,100), limits=c(0, 100)) +
#  coord_fixed(100/30, expand=F)
#stateawarefig




```



```{r regressions with WEIGHTS separately urban rural age as spline!!!}
#WITH WEIGHTS not-clustered regressions for urban and rural separately#
library(clusterSEs)

#Rural aware

multiv_feclustaware_rural <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id , weights= p_wt_new,  data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))

#exp(cbind(OR = coef(multiv_feclustaware_rural), confint.default(multiv_feclustaware_rural))) 
aware_rural <- exp(cbind(OR = coef(multiv_feclustaware_rural), confint.default(multiv_feclustaware_rural)))
write.csv(aware_rural, "OR aware ruralweights.csv")



#results_clustawarerural <-summary(multiv_feclustaware_rural)
results_clustawareruralcoeff <-summary.glm(multiv_feclustaware_rural)$coefficients
write.csv(results_clustawareruralcoeff, "resultsglmclustawarerural2018_04_26weights.csv")



aware <-   read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware ruralweights.csv")



colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawarerural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware rural weightsweights.csv")
 









#Urban aware
multiv_feclustaware_urban <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustaware_urban), confint.default(multiv_feclustaware_urban))) 
aware_urban <- exp(cbind(OR = coef(multiv_feclustaware_urban), confint.default(multiv_feclustaware_urban)))
write.csv(aware_urban, "OR aware urbanweights.csv")


#CI_multiv_feclustaware_urban <- cluster.bs.glm(multiv_feclustaware_urban, dhs_nomiss_diabetic_only_urban, ~d_id, report=T )
#write.csv(CI_multiv_feclustaware_urban, "CI urban awareweights.csv")


#results_clustawareurban <-summary(multiv_feclustaware_urban)
results_clustawareurbancoeff <-summary.glm(multiv_feclustaware_urban)$coefficients
write.csv(results_clustawareurbancoeff, "resultsglmclustawareurban2018_04_26weights.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware urbanweights.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawareurban2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware urban weightsweights.csv")
 






#rural treated

multiv_feclusttreated_rural <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated_rural), confint.default(multiv_feclusttreated_rural)))
treated_rural <- exp(cbind(OR = coef(multiv_feclusttreated_rural), confint.default(multiv_feclusttreated_rural))) 
write.csv(treated_rural, "OR treated ruralweights.csv")



#results_clusttreatedrural <-summary(multiv_feclusttreated_rural)
results_clusttreatedruralcoeff <-summary.glm(multiv_feclusttreated_rural)$coefficients
write.csv(results_clusttreatedruralcoeff, "resultsglmclusttreatedrural2018_04_26weights.csv")




treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated ruralweights.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedrural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated rural weightsweights.csv")
 




#urban treated

multiv_feclusttreated_urban <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated_urban), confint.default(multiv_feclusttreated_urban))) 
treated_urban <- exp(cbind(OR = coef(multiv_feclusttreated_urban), confint.default(multiv_feclusttreated_urban)))
write.csv(treated_urban, "OR treated urbanweights.csv")


#results_clusttreatedurban <-summary(multiv_feclusttreated_urban)
results_clusttreatedurbancoeff <-summary.glm(multiv_feclusttreated_urban)$coefficients
write.csv(results_clusttreatedurbancoeff, "resultsglmclusttreatedurban2018_04_26weights.csv")


treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated urbanweights.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedurban2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated urban weightsweights.csv")
 





#rural controlled


multiv_feclustcontrolled_rural <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled_rural), confint.default(multiv_feclustcontrolled_rural))) 
controlled_rural <- exp(cbind(OR = coef(multiv_feclustcontrolled_rural), confint.default(multiv_feclustcontrolled_rural))) 
write.csv(controlled_rural, "OR controlled ruralweights.csv")


#results_clustcontrolledrural <-summary(multiv_feclustcontrolled_rural)
results_clustcontrolledruralcoeff <-summary.glm(multiv_feclustcontrolled_rural)$coefficients
write.csv(results_clustcontrolledruralcoeff, "resultsglmclustcontrolledrural2018_04_26weights.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled ruralweights.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledrural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled rural weightsweights.csv")
 








#urban controlled

multiv_feclustcontrolled_urban <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled_urban), confint.default(multiv_feclustcontrolled_urban)))
controlled_urban <- exp(cbind(OR = coef(multiv_feclustcontrolled_urban), confint.default(multiv_feclustcontrolled_urban)))
write.csv(controlled_urban, "OR controlled urbanweights.csv")


#results_clustcontrolledurban <-summary(multiv_feclustcontrolled_urban)
results_clustcontrolledurbancoeff <-summary.glm(multiv_feclustcontrolled_urban)$coefficients
write.csv(results_clustcontrolledurbancoeff, "resultsglmclustcontrolledurban2018_04_26weights.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled urbanweights.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledurban2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled urban weightsweights.csv")
 






```




```{r regressions with WEIGHTS separately urban rural age as spline try with survey design NOT USED}
#WITH WEIGHTS not-clustered regressions for urban and rural separately#
library(clusterSEs)
library(survey)


dhs_nomiss_diabetic_only_rural_noNAinpsu <- filter(dhs_nomiss_diabetic_only_rural, is.na(psu)==F)

dhs_nomiss_diabetic_only_urban_noNAinpsu <- filter(dhs_nomiss_diabetic_only_urban, is.na(psu)==F)



svy_aware <- dhs_nomiss_diabetic_only_rural_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables=c(diabetic_aware_dbl,diabetic_treated_dbl,diabetic_controlled_dbl,
                                 rcs(age,5), wealth_quintile_rurb,
                                educatnames, married, sex, d_id))

multiv_feclustaware_rural <- survey:svyglm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,
                                  design = svy_controlled,
                                  family = binomial(link = "logit")) 

aware_rural <- exp(cbind(OR = coef(multiv_feclustaware_rural), confint.default(multiv_feclustaware_rural)))
write.csv(aware_rural, "OR aware ruralweights.csv")



#results_clustawarerural <-summary(multiv_feclustaware_rural)
results_clustawareruralcoeff <-summary.glm(multiv_feclustaware_rural)$coefficients
write.csv(results_clustawareruralcoeff, "resultsglmclustawarerural2018_04_26weights.csv")



aware <-   read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware ruralweights.csv")



colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawarerural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware rural weightsweights.csv")
 

#Rural aware

multiv_feclustaware_rural <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id , weights= p_wt_new,  data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))

#exp(cbind(OR = coef(multiv_feclustaware_rural), confint.default(multiv_feclustaware_rural))) 
aware_rural <- exp(cbind(OR = coef(multiv_feclustaware_rural), confint.default(multiv_feclustaware_rural)))
write.csv(aware_rural, "OR aware ruralweights.csv")



#results_clustawarerural <-summary(multiv_feclustaware_rural)
results_clustawareruralcoeff <-summary.glm(multiv_feclustaware_rural)$coefficients
write.csv(results_clustawareruralcoeff, "resultsglmclustawarerural2018_04_26weights.csv")



aware <-   read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware ruralweights.csv")



colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawarerural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware rural weightsweights.csv")
 









#Urban aware
multiv_feclustaware_urban <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustaware_urban), confint.default(multiv_feclustaware_urban))) 
aware_urban <- exp(cbind(OR = coef(multiv_feclustaware_urban), confint.default(multiv_feclustaware_urban)))
write.csv(aware_urban, "OR aware urbanweights.csv")


#CI_multiv_feclustaware_urban <- cluster.bs.glm(multiv_feclustaware_urban, dhs_nomiss_diabetic_only_urban, ~d_id, report=T )
#write.csv(CI_multiv_feclustaware_urban, "CI urban awareweights.csv")


#results_clustawareurban <-summary(multiv_feclustaware_urban)
results_clustawareurbancoeff <-summary.glm(multiv_feclustaware_urban)$coefficients
write.csv(results_clustawareurbancoeff, "resultsglmclustawareurban2018_04_26weights.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware urbanweights.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawareurban2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware urban weightsweights.csv")
 






#rural treated

multiv_feclusttreated_rural <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated_rural), confint.default(multiv_feclusttreated_rural)))
treated_rural <- exp(cbind(OR = coef(multiv_feclusttreated_rural), confint.default(multiv_feclusttreated_rural))) 
write.csv(treated_rural, "OR treated ruralweights.csv")



#results_clusttreatedrural <-summary(multiv_feclusttreated_rural)
results_clusttreatedruralcoeff <-summary.glm(multiv_feclusttreated_rural)$coefficients
write.csv(results_clusttreatedruralcoeff, "resultsglmclusttreatedrural2018_04_26weights.csv")




treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated ruralweights.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedrural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated rural weightsweights.csv")
 




#urban treated

multiv_feclusttreated_urban <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated_urban), confint.default(multiv_feclusttreated_urban))) 
treated_urban <- exp(cbind(OR = coef(multiv_feclusttreated_urban), confint.default(multiv_feclusttreated_urban)))
write.csv(treated_urban, "OR treated urbanweights.csv")


#results_clusttreatedurban <-summary(multiv_feclusttreated_urban)
results_clusttreatedurbancoeff <-summary.glm(multiv_feclusttreated_urban)$coefficients
write.csv(results_clusttreatedurbancoeff, "resultsglmclusttreatedurban2018_04_26weights.csv")


treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated urbanweights.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedurban2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated urban weightsweights.csv")
 





#rural controlled


multiv_feclustcontrolled_rural <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled_rural), confint.default(multiv_feclustcontrolled_rural))) 
controlled_rural <- exp(cbind(OR = coef(multiv_feclustcontrolled_rural), confint.default(multiv_feclustcontrolled_rural))) 
write.csv(controlled_rural, "OR controlled ruralweights.csv")


#results_clustcontrolledrural <-summary(multiv_feclustcontrolled_rural)
results_clustcontrolledruralcoeff <-summary.glm(multiv_feclustcontrolled_rural)$coefficients
write.csv(results_clustcontrolledruralcoeff, "resultsglmclustcontrolledrural2018_04_26weights.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled ruralweights.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledrural2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled rural weightsweights.csv")
 








#urban controlled

multiv_feclustcontrolled_urban <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, weights= p_wt_new, data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled_urban), confint.default(multiv_feclustcontrolled_urban)))
controlled_urban <- exp(cbind(OR = coef(multiv_feclustcontrolled_urban), confint.default(multiv_feclustcontrolled_urban)))
write.csv(controlled_urban, "OR controlled urbanweights.csv")


#results_clustcontrolledurban <-summary(multiv_feclustcontrolled_urban)
results_clustcontrolledurbancoeff <-summary.glm(multiv_feclustcontrolled_urban)$coefficients
write.csv(results_clustcontrolledurbancoeff, "resultsglmclustcontrolledurban2018_04_26weights.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled urbanweights.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledurban2018_04_26weights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled urban weightsweights.csv")
 






```











```{r regressions with WEIGHTS urban and rural NOT separated and Interaction term age}
library(clusterSEs)




# aware
multiv_feclustaware <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+ urban_lab  + rcs(age,5):urban_lab + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustaware), confint.default(multiv_feclustaware))) 
aware <- exp(cbind(OR = coef(multiv_feclustaware), confint.default(multiv_feclustaware)))
write.csv(aware, "OR aware weightsrural and urbaninteraction_age.csv")


#CI_multiv_feclustaware <- cluster.bs.glm(multiv_feclustaware, dhs_nomiss_diabetic_only, ~d_id, report=T )
#write.csv(CI_multiv_feclustaware, "CI  awareweightsrural and urbaninteraction_age.csv")


#results_clustaware <-summary(multiv_feclustaware)
results_clustawarecoeff <-summary.glm(multiv_feclustaware)$coefficients
write.csv(results_clustawarecoeff, "resultsglmclustaware2018_04_26weightsrural and urbaninteraction_age.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware weightsrural and urbaninteraction_age.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustaware2018_04_26weightsrural and urbaninteraction_age.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware  weightsweightsrural and urbaninteraction_age.csv")
 





# treated

multiv_feclusttreated <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab  + rcs(age,5):urban_lab + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated), confint.default(multiv_feclusttreated))) 
treated <- exp(cbind(OR = coef(multiv_feclusttreated), confint.default(multiv_feclusttreated)))
write.csv(treated, "OR treated weightsrural and urbaninteraction_age.csv")


#results_clusttreated <-summary(multiv_feclusttreated)
results_clusttreatedcoeff <-summary.glm(multiv_feclusttreated)$coefficients
write.csv(results_clusttreatedcoeff, "resultsglmclusttreated2018_04_26weightsrural and urbaninteraction_age.csv")


treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated weightsrural and urbaninteraction_age.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreated2018_04_26weightsrural and urbaninteraction_age.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated  weightsweightsrural and urbaninteraction_age.csv")
 






# controlled

multiv_feclustcontrolled <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab + rcs(age,5):urban_lab + d_id, weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled), confint.default(multiv_feclustcontrolled)))
controlled <- exp(cbind(OR = coef(multiv_feclustcontrolled), confint.default(multiv_feclustcontrolled)))
write.csv(controlled, "OR controlled weightsrural and urbaninteraction_age.csv")


#results_clustcontrolled <-summary(multiv_feclustcontrolled)
results_clustcontrolledcoeff <-summary.glm(multiv_feclustcontrolled)$coefficients
write.csv(results_clustcontrolledcoeff, "resultsglmclustcontrolled2018_04_26weightsrural and urbaninteraction_age.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled weightsrural and urbaninteraction_age.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolled2018_04_26weightsrural and urbaninteraction_age.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled  weightsweightsrural and urbaninteraction_age.csv")
 






```




```{r regressions with WEIGHTS urban and rural NOT separated and Interaction term wealth}
library(clusterSEs)




# aware
multiv_feclustaware <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab+ wealth_quintile_rurb:urban_lab  + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustaware), confint.default(multiv_feclustaware))) 
aware <- exp(cbind(OR = coef(multiv_feclustaware), confint.default(multiv_feclustaware)))
write.csv(aware, "OR aware weightsrural and urbaninteraction_wealth.csv")


#CI_multiv_feclustaware <- cluster.bs.glm(multiv_feclustaware, dhs_nomiss_diabetic_only, ~d_id, report=T )
#write.csv(CI_multiv_feclustaware, "CI  awareweightsrural and urbaninteraction_wealth.csv")


#results_clustaware <-summary(multiv_feclustaware)
results_clustawarecoeff <-summary.glm(multiv_feclustaware)$coefficients
write.csv(results_clustawarecoeff, "resultsglmclustaware2018_04_26weightsrural and urbaninteraction_wealth.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware weightsrural and urbaninteraction_wealth.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustaware2018_04_26weightsrural and urbaninteraction_wealth.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware  weightsweightsrural and urbaninteraction_wealth.csv")
 





# treated

multiv_feclusttreated <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab + wealth_quintile_rurb:urban_lab + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated), confint.default(multiv_feclusttreated))) 
treated <- exp(cbind(OR = coef(multiv_feclusttreated), confint.default(multiv_feclusttreated)))
write.csv(treated, "OR treated weightsrural and urbaninteraction_wealth.csv")


#results_clusttreated <-summary(multiv_feclusttreated)
results_clusttreatedcoeff <-summary.glm(multiv_feclusttreated)$coefficients
write.csv(results_clusttreatedcoeff, "resultsglmclusttreated2018_04_26weightsrural and urbaninteraction_wealth.csv")


treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated weightsrural and urbaninteraction_wealth.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreated2018_04_26weightsrural and urbaninteraction_wealth.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated  weightsweightsrural and urbaninteraction_wealth.csv")
 






# controlled

multiv_feclustcontrolled <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab  + wealth_quintile_rurb:urban_lab  + d_id, weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled), confint.default(multiv_feclustcontrolled)))
controlled <- exp(cbind(OR = coef(multiv_feclustcontrolled), confint.default(multiv_feclustcontrolled)))
write.csv(controlled, "OR controlled weightsrural and urbaninteraction_wealth.csv")


#results_clustcontrolled <-summary(multiv_feclustcontrolled)
results_clustcontrolledcoeff <-summary.glm(multiv_feclustcontrolled)$coefficients
write.csv(results_clustcontrolledcoeff, "resultsglmclustcontrolled2018_04_26weightsrural and urbaninteraction_wealth.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled weightsrural and urbaninteraction_wealth.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolled2018_04_26weightsrural and urbaninteraction_wealth.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled  weightsweightsrural and urbaninteraction_wealth.csv")
 






```



```{r regressions with WEIGHTS urban and rural NOT separated and Interaction term education}
library(clusterSEs)




# aware
multiv_feclustaware <- glm(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab + educatnames:urban_lab  + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustaware), confint.default(multiv_feclustaware))) 
aware <- exp(cbind(OR = coef(multiv_feclustaware), confint.default(multiv_feclustaware)))
write.csv(aware, "OR aware weightsrural and urbaninteraction_education.csv")


#CI_multiv_feclustaware <- cluster.bs.glm(multiv_feclustaware, dhs_nomiss_diabetic_only, ~d_id, report=T )
#write.csv(CI_multiv_feclustaware, "CI  awareweightsrural and urbaninteraction_education.csv")


#results_clustaware <-summary(multiv_feclustaware)
results_clustawarecoeff <-summary.glm(multiv_feclustaware)$coefficients
write.csv(results_clustawarecoeff, "resultsglmclustaware2018_04_26weightsrural and urbaninteraction_education.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware weightsrural and urbaninteraction_education.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustaware2018_04_26weightsrural and urbaninteraction_education.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware  weightsweightsrural and urbaninteraction_education.csv")
 





# treated

multiv_feclusttreated <- glm(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab + educatnames:urban_lab  + d_id,  weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclusttreated), confint.default(multiv_feclusttreated))) 
treated <- exp(cbind(OR = coef(multiv_feclusttreated), confint.default(multiv_feclusttreated)))
write.csv(treated, "OR treated weightsrural and urbaninteraction_education.csv")


#results_clusttreated <-summary(multiv_feclusttreated)
results_clusttreatedcoeff <-summary.glm(multiv_feclusttreated)$coefficients
write.csv(results_clusttreatedcoeff, "resultsglmclusttreated2018_04_26weightsrural and urbaninteraction_education.csv")


treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated weightsrural and urbaninteraction_education.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreated2018_04_26weightsrural and urbaninteraction_education.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated  weightsweightsrural and urbaninteraction_education.csv")
 






# controlled

multiv_feclustcontrolled <- glm(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex+urban_lab + educatnames:urban_lab  + d_id, weights= p_wt_new, data=dhs_nomiss_diabetic_only, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclustcontrolled), confint.default(multiv_feclustcontrolled)))
controlled <- exp(cbind(OR = coef(multiv_feclustcontrolled), confint.default(multiv_feclustcontrolled)))
write.csv(controlled, "OR controlled weightsrural and urbaninteraction_education.csv")


#results_clustcontrolled <-summary(multiv_feclustcontrolled)
results_clustcontrolledcoeff <-summary.glm(multiv_feclustcontrolled)$coefficients
write.csv(results_clustcontrolledcoeff, "resultsglmclustcontrolled2018_04_26weightsrural and urbaninteraction_education.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled weightsrural and urbaninteraction_education.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolled2018_04_26weightsrural and urbaninteraction_education.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled  weightsweightsrural and urbaninteraction_education.csv")
 






```



```{r regression no weights urban rural separately}


#clustered regressions for urban and rural separately NO WEIGHTS#


#Rural diabetes

#multiv_feclust_diab_rural <- glm.cluster(formula = ex_diab_broad_ind ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + #d_id, cluster="psu", data=dhs_nomiss_rural,  family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclust_diab_rural), confint(multiv_feclust_diab_rural))) 
#diab_rural <- exp(cbind(OR = coef(multiv_feclust_diab_rural), confint(multiv_feclust_diab_rural))) 
#write.csv(diab_rural, "OR diab ruralnoweights.csv")



#results_clustdiabrural <-summary(multiv_feclust_diab_rural)


#results_clustdiabrural_CI <- as.data.frame(results_clustdiabrural)

#write.csv(results_clustdiabrural, "resultsglmclustdiabrural2018_03_04noweights.csv")

#Urban diabetes

#multiv_feclust_diab_urban <- glm.cluster(formula = ex_diab_broad_ind ~  rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, cluster="psu", data=dhs_nomiss_urban, family=binomial(link="logit"))
#exp(cbind(OR = coef(multiv_feclust_diab_urban), confint(multiv_feclust_diab_urban))) 
#diab_urban <- exp(cbind(OR = coef(multiv_feclust_diab_urban), confint(multiv_feclust_diab_urban))) 
#write.csv(diab_urban, "OR diab urbannoweights.csv")





#results_clustdiaburban <-summary(multiv_feclust_diab_urban)
#write.csv(results_clustdiaburban, "resultsglmclustdiaburban2018_03_04noweights.csv")


#Rural aware




multiv_feclustaware_rural <- glm.cluster(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id , cluster="psu", data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))

aware_rural <- exp(cbind(OR = coef(multiv_feclustaware_rural), confint(multiv_feclustaware_rural)))
write.csv(aware_rural, "OR aware ruralnoweights.csv")


#save(file="glm.clusterawarerural2018_03_04",multiv_feclustaware_rural)
results_clustawarerural <-summary(multiv_feclustaware_rural)
write.csv(results_clustawarerural, "resultsglmclustawarerural2018_04_22noweights.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware ruralnoweights.csv")




colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawarerural2018_04_22noweights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware rural noweightsnoweights.csv")
 



#Urban aware
multiv_feclustaware_urban <- glm.cluster(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
aware_urban <- exp(cbind(OR = coef(multiv_feclustaware_urban), confint(multiv_feclustaware_urban)))
write.csv(aware_urban, "OR aware urbannoweights.csv")


results_clustawareurban <-summary(multiv_feclustaware_urban)
write.csv(results_clustawareurban, "resultsglmclustawareurban2018_04_22noweights.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware urbannoweights.csv")



colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawareurban2018_04_22noweights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware urban noweightsnoweights.csv")
 




#rural treated

multiv_feclusttreated_rural <- glm.cluster(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))
treated_rural <- exp(cbind(OR = coef(multiv_feclusttreated_rural), confint(multiv_feclusttreated_rural))) 
write.csv(treated_rural, "OR treated ruralnoweights.csv")

#save(file="glm.clustertreatedrural2018_03_04",multiv_feclusttreated_rural)
results_clusttreatedrural <-summary(multiv_feclusttreated_rural)
write.csv(results_clusttreatedrural, "resultsglmclusttreatedrural2018_04_22noweights.csv")




treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated ruralnoweights.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedrural2018_04_22noweights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated rural noweightsnoweights.csv")
 



#urban treated

multiv_feclusttreated_urban <- glm.cluster(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
treated_urban <- exp(cbind(OR = coef(multiv_feclusttreated_urban), confint(multiv_feclusttreated_urban)))
write.csv(treated_urban, "OR treated urbannoweights.csv")

#save(file="glm.clustertreatedurban2018_03_04",multiv_feclusttreated_urban)
results_clusttreatedurban <-summary(multiv_feclusttreated_urban)
write.csv(results_clusttreatedurban, "resultsglmclusttreatedurban2018_04_22noweights.csv")





treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated urbannoweights.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedurban2018_04_22noweights.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated urban noweightsnoweights.csv")
 



#rural controlled


multiv_feclustcontrolled_rural <- glm.cluster(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_rural, family=binomial(link="logit"))
controlled_rural <- exp(cbind(OR = coef(multiv_feclustcontrolled_rural), confint(multiv_feclustcontrolled_rural))) 
write.csv(controlled_rural, "OR controlled ruralnoweights.csv")



#save(file="glm.clustercontrolledrural2018_03_04",multiv_feclustcontrolled_rural)
results_clustcontrolledrural <-summary(multiv_feclustcontrolled_rural)
write.csv(results_clustcontrolledrural, "resultsglmclustcontrolledrural2018_04_22noweights.csv")





controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled ruralnoweights.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledrural2018_04_22noweights.csv")

results_controlled <- mutate(results_controlled,
                  p_Value = round(Pr...z..,3))

results_controlled$p_Value <- sprintf("%.3f", results_controlled$p_Value)



results_controlled <- mutate(results_controlled,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_controlled <- results_controlled %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_controlled, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled rural noweightsnoweights.csv")
 




#urban controlled

multiv_feclustcontrolled_urban <- glm.cluster(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married +sex + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_urban, family=binomial(link="logit"))
controlled_urban <- exp(cbind(OR = coef(multiv_feclustcontrolled_urban), confint(multiv_feclustcontrolled_urban)))
write.csv(controlled_urban, "OR controlled urbannoweights.csv")


#save(file="glm.clustercontrolledurban2018_03_04",multiv_feclustcontrolled_urban)
results_clustcontrolledurban <-summary(multiv_feclustcontrolled_urban)
write.csv(results_clustcontrolledurban, "resultsglmclustcontrolledurban2018_04_22noweights.csv")



controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled urbannoweights.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledurban2018_04_22noweights.csv")

results_controlled <- mutate(results_controlled,
                  p_Value = round(Pr...z..,3))

results_controlled$p_Value <- sprintf("%.3f", results_controlled$p_Value)



results_controlled <- mutate(results_controlled,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_controlled <- results_controlled %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_controlled, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled urban noweightsnoweights.csv")
 


```

```{r regression with clustering urban rural separately women}


########Clustered regressions rural/urban separately AND MEN AND WOMEN SEPARATE

####WOMEN

#rural_women aware

multiv_feclustaware_rural_women <- glm.cluster(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id , cluster="psu", data=dhs_nomiss_diabetic_only_rural_women, family=binomial(link="logit"))

aware_rural_women <- exp(cbind(OR = coef(multiv_feclustaware_rural_women), confint(multiv_feclustaware_rural_women)))
write.csv(aware_rural_women, "OR aware rural_women.csv")


#save(file="glm.clusterawarerural_women2018_03_04",multiv_feclustaware_rural_women)
results_clustawarerural_women <-summary(multiv_feclustaware_rural_women)
write.csv(results_clustawarerural_women, "resultsglmclustawarerural_women2018_04_22.csv")






aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware rural_women.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawarerural_women2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware rural womenwomen.csv")
 








#urban_women aware
multiv_feclustaware_urban_women <- glm.cluster(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_urban_women, family=binomial(link="logit"))

aware_urban_women <- exp(cbind(OR = coef(multiv_feclustaware_urban_women), confint(multiv_feclustaware_urban_women)))
write.csv(aware_urban_women, "OR aware urban_women.csv")


#save(file="glm.clusterawareurban_women2018_03_04",multiv_feclustaware_urban_women)
results_clustawareurban_women <-summary(multiv_feclustaware_urban_women)
write.csv(results_clustawareurban_women, "resultsglmclustawareurban_women2018_04_22.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware urban_women.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawareurban_women2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware urban womenwomen.csv")
 







#rural_women treated

multiv_feclusttreated_rural_women <- glm.cluster(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_rural_women, family=binomial(link="logit"))
treated_rural_women <- exp(cbind(OR = coef(multiv_feclusttreated_rural_women), confint(multiv_feclusttreated_rural_women))) 
write.csv(treated_rural_women, "OR treated rural_women.csv")


#save(file="glm.clustertreatedrural_women2018_03_04",multiv_feclusttreated_rural_women)
results_clusttreatedrural_women <-summary(multiv_feclusttreated_rural_women)
write.csv(results_clusttreatedrural_women, "resultsglmclusttreatedrural_women2018_04_22.csv")


treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated rural_women.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedrural_women2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated rural womenwomen.csv")
 


#urban_women treated

multiv_feclusttreated_urban_women <- glm.cluster(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_urban_women, family=binomial(link="logit"))
treated_urban_women <- exp(cbind(OR = coef(multiv_feclusttreated_urban_women), confint(multiv_feclusttreated_urban_women)))
write.csv(treated_urban_women, "OR treated urban_women.csv")


#save(file="glm.clustertreatedurban_women2018_03_04",multiv_feclusttreated_urban_women)
results_clusttreatedurban_women <-summary(multiv_feclusttreated_urban_women)
write.csv(results_clusttreatedurban_women, "resultsglmclusttreatedurban_women2018_04_22.csv")

treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated urban_women.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedurban_women2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated urban womenwomen.csv")
 


#rural_women controlled


multiv_feclustcontrolled_rural_women <- glm.cluster(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_rural_women, family=binomial(link="logit"))
controlled_rural_women <- exp(cbind(OR = coef(multiv_feclustcontrolled_rural_women), confint(multiv_feclustcontrolled_rural_women))) 
write.csv(controlled_rural_women, "OR controlled rural_women.csv")

#save(file="glm.clustercontrolledrural_women2018_03_04",multiv_feclustcontrolled_rural_women)
results_clustcontrolledrural_women <-summary(multiv_feclustcontrolled_rural_women)
write.csv(results_clustcontrolledrural_women, "resultsglmclustcontrolledrural_women2018_04_22.csv")


controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled rural_women.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledrural_women2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled rural womenwomen.csv")
 







#urban_women controlled

multiv_feclustcontrolled_urban_women <- glm.cluster(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_urban_women, family=binomial(link="logit"))
controlled_urban_women <- exp(cbind(OR = coef(multiv_feclustcontrolled_urban_women), confint(multiv_feclustcontrolled_urban_women)))
write.csv(controlled_urban_women, "OR controlled urban_women.csv")

#save(file="glm.clustercontrolledurban_women2018_03_04",multiv_feclustcontrolled_urban_women)
results_clustcontrolledurban_women <-summary(multiv_feclustcontrolled_urban_women)
write.csv(results_clustcontrolledurban_women, "resultsglmclustcontrolledurban_women2018_04_22.csv")


controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled urban_women.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledurban_women2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled urban womenwomen.csv")
 



```

```{r regression with clusterin urban rural separately men}
####men

#rural_men aware

multiv_feclustaware_rural_men <- glm.cluster(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id ,  cluster="psu", data=dhs_nomiss_diabetic_only_rural_men, family=binomial(link="logit"))

aware_rural_men <- exp(cbind(OR = coef(multiv_feclustaware_rural_men), confint(multiv_feclustaware_rural_men)))
write.csv(aware_rural_men, "OR aware rural_men.csv")

#save(file="glm.clusterawarerural_men2018_03_04",multiv_feclustaware_rural_men)
results_clustawarerural_men <-summary(multiv_feclustaware_rural_men)
write.csv(results_clustawarerural_men, "resultsglmclustawarerural_men2018_04_22.csv")








aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware rural_men.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawarerural_men2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware rural menmen.csv")
 




#urban_men aware
multiv_feclustaware_urban_men <- glm.cluster(formula = diabetic_aware_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married  + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_urban_men, family=binomial(link="logit"))
aware_urban_men <- exp(cbind(OR = coef(multiv_feclustaware_urban_men), confint(multiv_feclustaware_urban_men)))
write.csv(aware_urban_men, "OR aware urban_men.csv")


#save(file="glm.clusterawareurban_men2018_03_04",multiv_feclustaware_urban_men)
results_clustawareurban_men <-summary(multiv_feclustaware_urban_men)
write.csv(results_clustawareurban_men, "resultsglmclustawareurban_men2018_04_22.csv")





#save(file="glm.clusterawareurban_men2018_03_04",multiv_feclustaware_urban_men)
results_clustawareurban_men <-summary(multiv_feclustaware_urban_men)
write.csv(results_clustawareurban_men, "resultsglmclustawareurban_men2018_04_22.csv")


aware <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR aware urban_men.csv")


colnames(aware)[colnames(aware)=="X2.5.."] <- "lowci"
colnames(aware)[colnames(aware)=="X97.5.."] <- "uppci"

 aware <- mutate(aware,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

aware$lowci <- sprintf("%.2f", aware$lowci)
aware$OR <- sprintf("%.2f", aware$OR)
aware$uppci <- sprintf("%.2f", aware$uppci)



  aware <- mutate(aware,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  aware <- aware %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustawareurban_men2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(aware, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR aware urban menmen.csv")
 








#rural_men treated

multiv_feclusttreated_rural_men <- glm.cluster(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_rural_men, family=binomial(link="logit"))
treated_rural_men <- exp(cbind(OR = coef(multiv_feclusttreated_rural_men), confint(multiv_feclusttreated_rural_men))) 
write.csv(treated_rural_men, "OR treated rural_men.csv")



#save(file="glm.clustertreatedrural_men2018_03_04",multiv_feclusttreated_rural_men)
results_clusttreatedrural_men <-summary(multiv_feclusttreated_rural_men)
write.csv(results_clusttreatedrural_men, "resultsglmclusttreatedrural_men2018_04_22.csv")






treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated rural_men.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedrural_men2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated rural menmen.csv")
 

#urban_men treated

multiv_feclusttreated_urban_men <- glm.cluster(formula = diabetic_treated_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",  data=dhs_nomiss_diabetic_only_urban_men, family=binomial(link="logit"))
treated_urban_men <- exp(cbind(OR = coef(multiv_feclusttreated_urban_men), confint(multiv_feclusttreated_urban_men)))
write.csv(treated_urban_men, "OR treated urban_men.csv")

#save(file="glm.clustertreatedurban_men2018_03_04",multiv_feclusttreated_urban_men)
results_clusttreatedurban_men <-summary(multiv_feclusttreated_urban_men)
write.csv(results_clusttreatedurban_men, "resultsglmclusttreatedurban_men2018_04_22.csv")




treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR treated urban_men.csv")


colnames(treated)[colnames(treated)=="X2.5.."] <- "lowci"
colnames(treated)[colnames(treated)=="X97.5.."] <- "uppci"

 treated <- mutate(treated,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

treated$lowci <- sprintf("%.2f", treated$lowci)
treated$OR <- sprintf("%.2f", treated$OR)
treated$uppci <- sprintf("%.2f", treated$uppci)



  treated <- mutate(treated,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  treated <- treated %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclusttreatedurban_men2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(treated, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR treated urban menmen.csv")
 


#rural_men controlled


multiv_feclustcontrolled_rural_men <- glm.cluster(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu",data=dhs_nomiss_diabetic_only_rural_men, family=binomial(link="logit"))
controlled_rural_men <- exp(cbind(OR = coef(multiv_feclustcontrolled_rural_men), confint(multiv_feclustcontrolled_rural_men))) 
write.csv(controlled_rural_men, "OR controlled rural_men.csv")

#save(file="glm.clustercontrolledrural_men2018_03_04",multiv_feclustcontrolled_rural_men)
results_clustcontrolledrural_men <-summary(multiv_feclustcontrolled_rural_men)
write.csv(results_clustcontrolledrural_men, "resultsglmclustcontrolledrural_men2018_04_22.csv")





controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled rural_men.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledrural_men2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled rural menmen.csv")
 




#urban_men controlled

multiv_feclustcontrolled_urban_men <- glm.cluster(formula = diabetic_controlled_dbl ~ rcs(age,5) + wealth_quintile_rurb + educatnames + married + d_id, cluster="psu", data=dhs_nomiss_diabetic_only_urban_men, family=binomial(link="logit"))
controlled_urban_men <- exp(cbind(OR = coef(multiv_feclustcontrolled_urban_men), confint(multiv_feclustcontrolled_urban_men)))
write.csv(controlled_urban_men, "OR controlled urban_men.csv")

#save(file="glm.clustercontrolledurban_men2018_03_04",multiv_feclustcontrolled_urban_men)
results_clustcontrolledurban_men <-summary(multiv_feclustcontrolled_urban_men)
write.csv(results_clustcontrolledurban_men, "resultsglmclustcontrolledurban_men2018_04_22.csv")





controlled <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/OR controlled urban_men.csv")


colnames(controlled)[colnames(controlled)=="X2.5.."] <- "lowci"
colnames(controlled)[colnames(controlled)=="X97.5.."] <- "uppci"

 controlled <- mutate(controlled,
                  lowci = round(lowci,2),
                  OR =  round(OR,2),
                  uppci =  round(uppci,2))
 

controlled$lowci <- sprintf("%.2f", controlled$lowci)
controlled$OR <- sprintf("%.2f", controlled$OR)
controlled$uppci <- sprintf("%.2f", controlled$uppci)



  controlled <- mutate(controlled,
        citemp = str_c(lowci, uppci, sep="-"),
         bracketstart = "(", 
         bracketend = ")",
         ci = str_c(bracketstart, citemp, bracketend, sep=""),
         rr = str_c(OR, ci, sep=" ")) 
  
  controlled <- controlled %>%
      dplyr::select(X,rr)


results_treated <- read.csv("~/Documents/Public Health Files/Public Health/public health/Diabetes_cascades_nomen50to54/resultsglmclustcontrolledurban_men2018_04_22.csv")

results_treated <- mutate(results_treated,
                  p_Value = round(Pr...z..,3))

results_treated$p_Value <- sprintf("%.3f", results_treated$p_Value)



results_treated <- mutate(results_treated,
                  p_Value = ifelse(p_Value=="0.000", "<0.001", p_Value))

  results_treated <- results_treated %>%
      dplyr::select(X,p_Value)
  
  joint <- left_join(controlled, results_treated, by=c("X"="X"))

  
  
age <- c(" ", "1 (Reference)"," ")
noref <- c(" ", " "," ")


#joint <- InsertRow(joint, NewRow = age, RowNum = 23)
#joint <- InsertRow(joint, NewRow = noref, RowNum = 23)
joint <- InsertRow(joint, NewRow = age, RowNum = 21)
joint <- InsertRow(joint, NewRow = noref, RowNum = 21)
joint <- InsertRow(joint, NewRow = age, RowNum = 13)
joint <- InsertRow(joint, NewRow = noref, RowNum = 13)
joint <- InsertRow(joint, NewRow = age, RowNum = 9)
joint <- InsertRow(joint, NewRow = noref, RowNum = 9)
joint <- InsertRow(joint, NewRow = age, RowNum = 2)
joint <- InsertRow(joint, NewRow = noref, RowNum = 2)



 write.csv(joint, "OR controlled urban menmen.csv")
 









```

```{r regression figures education}



#Regression figure for aware education #

dhs_nomiss_diabetic_only_aware_regress <- filter(dhs_nomiss_diabetic_only, is.na(diabetic_aware_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                     is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_aware_regress <- augment(multiv_feglmaware, dhs_nomiss_diabetic_only_aware_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_aware_regress <- dhs_nomiss_diabetic_only_aware_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + educatnames + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + educatnames + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
fig_aware <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "educatnames", "urban_lab"))
fig_aware <- mutate(fig_aware, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))


write.csv(fig_aware, "regressionfigure_aware_educat htn 3 28.csv")


# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_aware <- fig_aware %>% 
  mutate(lowerci_alt=ifelse(educatnames=="Highschool unfinished" | educatnames=="Highschool or above", lowerci, NA),
         upperci_alt=ifelse(educatnames=="Highschool unfinished" | educatnames=="Highschool or above", upperci, NA),
         alphaindic = as.factor(ifelse(educatnames=="Highschool unfinished" | educatnames=="Highschool or above", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_aware %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=educatnames, shape=educatnames), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=educatnames), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(15, 70), ratio= 7/50, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Education") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Education") 
diabpredfig

dev.copy(pdf,'reg aware education htn 3 28.pdf')
dev.off()


#Regression figure for treated education #




dhs_nomiss_diabetic_only_treated_regress <- filter(dhs_nomiss_diabetic_only, is.na(diabetic_treated_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                       is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_treated_regress <- augment(multiv_feglmtreated, dhs_nomiss_diabetic_only_treated_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_treated_regress <- dhs_nomiss_diabetic_only_treated_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + educatnames + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + educatnames + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
fig_treated <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "educatnames", "urban_lab"))
fig_treated <- mutate(fig_treated, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_treated, "regressionfigure_treated educat htn 3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_treated <- fig_treated %>% 
  mutate(lowerci_alt=ifelse(educatnames=="x" | educatnames=="x", lowerci, NA),
         upperci_alt=ifelse(educatnames=="x" | educatnames=="x", upperci, NA),
         alphaindic = as.factor(ifelse(educatnames=="x" | educatnames=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_treated %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=educatnames, shape=educatnames), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=educatnames), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(14, 70), ratio= 7/50, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Education") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Education") 
diabpredfig

dev.copy(pdf,'reg treated education htn 3 28.pdf')
dev.off()

#Regression figure for controlled education#



dhs_nomiss_diabetic_only_controlled_regress <- filter(dhs_nomiss_diabetic_only, is.na(diabetic_controlled_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                     is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_controlled_regress <- augment(multiv_feglmcontrolled, dhs_nomiss_diabetic_only_controlled_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_controlled_regress <- dhs_nomiss_diabetic_only_controlled_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + educatnames + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + educatnames + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
fig_controlled <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "educatnames", "urban_lab"))
fig_controlled <- mutate(fig_controlled, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_controlled, "regressionfigure_controlled educat 3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_controlled <- fig_controlled %>% 
  mutate(lowerci_alt=ifelse(educatnames=="x" | educatnames=="x", lowerci, NA),
         upperci_alt=ifelse(educatnames=="x" | educatnames=="x", upperci, NA),
         alphaindic = as.factor(ifelse(educatnames=="x" | educatnames=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_controlled %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=educatnames, shape=educatnames), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=educatnames), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(10, 70), ratio= 7/50, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Education") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Education") 
diabpredfig


dev.copy(pdf,'reg controlled education htn 3 28.pdf')
dev.off()

```




```{r regression figure S3 wealth }




#Regression figure for aware wealth quintile#

dhs_nomiss_diabetic_only_aware_regress <- filter(dhs_nomiss_diabetic_only, is.na(diabetic_aware_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                   is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_aware_regress <- augment(multiv_feglmaware, dhs_nomiss_diabetic_only_aware_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_aware_regress <- dhs_nomiss_diabetic_only_aware_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
fig_aware <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_aware <- mutate(fig_aware, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))


write.csv(fig_aware, "regressionfigure_aware_wealth women 3 28.csv")


# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_aware <- fig_aware %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_aware %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig

#dev.copy(pdf,'reg aware wealth 3 28.pdf')
#dev.off()


#Regression figure for treated wealth#




dhs_nomiss_diabetic_only_treated_regress <- filter(dhs_nomiss_diabetic_only, is.na(diabetic_treated_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                     is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_treated_regress <- augment(multiv_feglmtreated, dhs_nomiss_diabetic_only_treated_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_treated_regress <- dhs_nomiss_diabetic_only_treated_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
fig_treated <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_treated <- mutate(fig_treated, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_treated, "regressionfigure_treated wealth women  3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_treated <- fig_treated %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_treated %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig

#dev.copy(pdf,'reg treated wealth  3 28.pdf')
#dev.off()
#
#Regression figure for controlled wealth quintile #



dhs_nomiss_diabetic_only_controlled_regress <- filter(dhs_nomiss_diabetic_only, is.na(diabetic_controlled_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                        is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_controlled_regress <- augment(multiv_feglmcontrolled, dhs_nomiss_diabetic_only_controlled_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_controlled_regress <- dhs_nomiss_diabetic_only_controlled_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
fig_controlled <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_controlled <- mutate(fig_controlled, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_controlled, "regressionfigure_controlled wealth women 3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_controlled <- fig_controlled %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_controlled %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig


dev.copy(pdf,'reg controlled wealth 3 28.pdf')
dev.off()




```




```{r regression figure S3 wealth women }




#Regression figure for aware wealth quintile#

dhs_nomiss_diabetic_only_aware_regress <- filter(dhs_nomiss_diabetic_only_women, is.na(diabetic_aware_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                   is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_aware_regress <- augment(multiv_feglmaware, dhs_nomiss_diabetic_only_aware_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_aware_regress <- dhs_nomiss_diabetic_only_aware_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
fig_aware <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_aware <- mutate(fig_aware, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))


write.csv(fig_aware, "regressionfigure_aware_wealth women 3 28.csv")


# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_aware <- fig_aware %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_aware %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig

#dev.copy(pdf,'reg aware wealth 3 28.pdf')
#dev.off()


#Regression figure for treated wealth#




dhs_nomiss_diabetic_only_treated_regress <- filter(dhs_nomiss_diabetic_only_women, is.na(diabetic_treated_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                     is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_treated_regress <- augment(multiv_feglmtreated, dhs_nomiss_diabetic_only_treated_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_treated_regress <- dhs_nomiss_diabetic_only_treated_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
fig_treated <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_treated <- mutate(fig_treated, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_treated, "regressionfigure_treated wealth women  3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_treated <- fig_treated %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_treated %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig

#dev.copy(pdf,'reg treated wealth  3 28.pdf')
#dev.off()
#
#Regression figure for controlled wealth quintile #



dhs_nomiss_diabetic_only_controlled_regress <- filter(dhs_nomiss_diabetic_only_women, is.na(diabetic_controlled_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                        is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_controlled_regress <- augment(multiv_feglmcontrolled, dhs_nomiss_diabetic_only_controlled_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_controlled_regress <- dhs_nomiss_diabetic_only_controlled_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
fig_controlled <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_controlled <- mutate(fig_controlled, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_controlled, "regressionfigure_controlled wealth women 3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_controlled <- fig_controlled %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_controlled %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig


dev.copy(pdf,'reg controlled wealth 3 28.pdf')
dev.off()




```




```{r regression figure S3 wealth men }




#Regression figure for aware wealth quintile#

dhs_nomiss_diabetic_only_aware_regress <- filter(dhs_nomiss_diabetic_only_men, is.na(diabetic_aware_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                   is.na(wealth_quintile_rurb_lab)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_aware_regress <- augment(multiv_feglmaware, dhs_nomiss_diabetic_only_aware_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_aware_regress <- dhs_nomiss_diabetic_only_aware_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_aware_regress, FUN = mean)
fig_aware <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_aware <- mutate(fig_aware, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))


write.csv(fig_aware, "regressionfigure_aware_wealth men 3 28.csv")


# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_aware <- fig_aware %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_aware %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig

#dev.copy(pdf,'reg aware wealth 3 28.pdf')
#dev.off()


#Regression figure for treated wealth#




dhs_nomiss_diabetic_only_treated_regress <- filter(dhs_nomiss_diabetic_only_men, is.na(diabetic_treated_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                     is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_treated_regress <- augment(multiv_feglmtreated, dhs_nomiss_diabetic_only_treated_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_treated_regress <- dhs_nomiss_diabetic_only_treated_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_treated_regress, FUN = mean)
fig_treated <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_treated <- mutate(fig_treated, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_treated, "regressionfigure_treated wealth men  3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_treated <- fig_treated %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_treated %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig

#dev.copy(pdf,'reg treated wealth  3 28.pdf')
#dev.off()
#
#Regression figure for controlled wealth quintile #



dhs_nomiss_diabetic_only_controlled_regress <- filter(dhs_nomiss_diabetic_only_men, is.na(diabetic_controlled_dbl)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                                                        is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_diabetic_only_controlled_regress <- augment(multiv_feglmcontrolled, dhs_nomiss_diabetic_only_controlled_regress, type.predict = "response", se.fit = TRUE)


dhs_nomiss_diabetic_only_controlled_regress <- dhs_nomiss_diabetic_only_controlled_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_diabetic_only_controlled_regress, FUN = mean)
fig_controlled <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig_controlled <- mutate(fig_controlled, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))

write.csv(fig_controlled, "regressionfigure_controlled wealth men 3 28.csv")



# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig_controlled <- fig_controlled %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="x" | wealth_quintile_rurb_lab=="x", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig_controlled %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 80), ratio= 6/80, expand=T) +
  theme_classic() +
  labs(x = "Age Group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth Quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth Quintile") 
diabpredfig


dev.copy(pdf,'reg controlled wealth 3 28.pdf')
dev.off()




```


```{r prevalence estimates}

####### crude diabetes prev by sex and state
svy_tot <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c( female_lab, ex_state_ind, ex_diab_broad_ind_dbl))

prevtot <- svy_tot %>%
  group_by(female_lab, ex_state_ind) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)

write_csv(prevtot, "stateprevdiabbysexandstate_2018-02-22.csv")

##### Crude controlled diabetes prev by sex and state

svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, female_lab, ex_state_ind))

prevtot <- svy_controlled %>%
  group_by(female_lab, ex_state_ind) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)



write_csv(prevtot, "stateprevcontrolledsexandstate_2018-02-22.csv")

#### Crude treated diabetes prev by sex and state


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, female_lab, ex_state_ind))

prevtot <- svy_treated %>%
  group_by(female_lab, ex_state_ind) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "stateprevtreatedbysexandstate_2018-03-02.csv")

#### Crude aware diabetes prev by sex and state


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl, female_lab, ex_state_ind))

prevtot <- svy_aware %>%
  group_by(female_lab, ex_state_ind) %>% 
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "stateprevawarebysexandstate_2018-02-22.csv")

####### crude diabetes prev by sex 
svy_tot <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind, female_lab, ex_state_ind, ex_diab_broad_ind_dbl))

prevtot <- svy_tot %>%
  group_by(female_lab) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)

write_csv(prevtot, "stateprevdiabbysexonly_2018-02-22.csv")

##### Crude controlled diabetes prev by sex

svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, female_lab, ex_state_ind))

prevtot <- svy_controlled %>%
  group_by(female_lab) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)



write_csv(prevtot, "stateprevcontrolledsexonly_2018-04-20.csv")

#### Crude treated diabetes prev by sex 


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, female_lab, ex_state_ind))

prevtot <- svy_treated %>%
  group_by(female_lab) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "stateprevtreatedbysexonly_2018-02-22.csv")



#### Crude aware diabetes prev by sex 


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
 as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl, female_lab, ex_state_ind))

prevtot <- svy_aware %>%
  group_by(female_lab) %>% 
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "stateprevawarebysexonly_2018-02-22.csv")

###
####### crude diabetes prev by  age 
svy_tot <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl,age_grpOR))

prevtot <- svy_tot %>%
  group_by(age_grpOR) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)

write_csv(prevtot, "stateprevdiabby age only _2018-03-29.csv")


####### crude diabetes prev by sex and age 
svy_tot <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl,age_grpOR, female_lab))

prevtot <- svy_tot %>%
  group_by(female_lab, age_grpOR) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)

write_csv(prevtot, "stateprevdiabbysexandage_2018-03-29.csv")

##### Crude controlled diabetes prev by sex and age

svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, age_grpOR, female_lab, ex_state_ind))

prevtot <- svy_controlled %>%
  group_by(female_lab, age_grpOR) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)



write_csv(prevtot, "stateprevcontrolledsexandage_2018-03-29.csv")

#### Crude treated diabetes prev by sex and age


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, age_grpOR, female_lab, ex_state_ind))

prevtot <- svy_treated %>%
  group_by(female_lab, age_grpOR) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "stateprevtreatedbysexandage_2018-03-29.csv")

#### Crude aware diabetes prev by sex and age 


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl, age_grpOR, female_lab, ex_state_ind))

prevtot <- svy_aware %>%
  group_by(female_lab, age_grpOR) %>% 
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "stateprevawarebysexandage_2018-03-10.csv")



#####Crude prevalence of treated among aware diabetics group by sex


svy_aware <- dhs_nomiss_diabetic_and_aware_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, female_lab, ex_state_ind))

prevtot <- svy_aware %>%
  group_by(female_lab) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "prevtreatedofawarebysex_2018-02-24.csv")


######Crude prevalence of controlled among treated diabetics by sex

svy_aware <- dhs_nomiss_diabetic_and_treated_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, female_lab, ex_state_ind))

prevtot <- svy_aware %>%
  group_by(female_lab) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)


write_csv(prevtot, "prevcontrolledoftreatedbysex_2018-02-24.csv")

#####Crude prevalence of treated among aware diabetics group 


svy_aware <- dhs_nomiss_diabetic_and_aware_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl  ))

prevtot <- svy_aware %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "prevtreatedofaware_2018-03-09.csv")


######Crude prevalence of controlled among treated diabetics 

svy_aware <- dhs_nomiss_diabetic_and_treated_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl))

prevtot <- svy_aware %>%
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)


write_csv(prevtot, "prevcontrolledoftreated_2018-03-09.csv")


##### Crude controlled diabetes prev 

svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl))

prevtot <- svy_controlled %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)



write_csv(prevtot, "prevcontrolled_2018-04-21.csv")

#### Crude treated diabetes prev 


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl))

prevtot <- svy_treated %>%
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "prevtreated_2018-04-21.csv")

#### Crude aware diabetes prev  


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl))

prevtot <- svy_aware %>%
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "prevaware_2018-04-21.csv")




#### Crude diabetes prev by state


svy_diab <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl, ex_state_ind))

prevtot <- svy_diab %>%
  group_by(ex_state_ind) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_ = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)


write_csv(prevtot, "stateprevdiabbystate_2018-04-21.csv")



#### Crude aware diabetes prev by state


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl, ex_state_ind))

prevtot <- svy_aware %>%
  group_by(ex_state_ind) %>% 
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "stateprevawarebystate_2018-04-21.csv")

#### Crude treated diabetes prev by state


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, ex_state_ind))

prevtot <- svy_treated %>%
  group_by(ex_state_ind) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "stateprevtreatedbystate_2018-04-21.csv")

#### Crude controlled diabetes prev by state


svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, ex_state_ind))

prevtot <- svy_controlled %>%
  group_by(ex_state_ind) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)


write_csv(prevtot, "stateprevcontrolledbystate_2018-04-21.csv")

#### Crude diabetes prev by urban/rural


svy_diab <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl, urban_lab))

prevtot <- svy_diab %>%
  group_by(urban_lab) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_ = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)


write_csv(prevtot, "prevdiabbyurbanrural_2018-04-21.csv")



#### Crude aware diabetes prev by rural/urban


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl, urban_lab))

prevtot <- svy_aware %>%
  group_by(urban_lab) %>% 
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "prevawarebyurbanrural_2018-04-21.csv")


#### Crude treated diabetes prev by urban/rural


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, urban_lab))

prevtot <- svy_treated %>%
  group_by(urban_lab) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "prevtreatedbyurbanrural_2018-04-21.csv")


#### Crude controlled diabetes prev by urban/rural


svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, urban_lab))

prevtot <- svy_controlled %>%
  group_by(urban_lab) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)


write_csv(prevtot, "prevcontrolledbyurbanrural_2018-04-21.csv")

#### Crude diabetes prev 


svy_diab <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl))

prevtot <- svy_diab %>%
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_ = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)


write_csv(prevtot, "prevdiaboverall_2018-04-21.csv")


#### Crude diabetes prev per 5 year age group


svy_diab <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl, age_grpOR))

prevtot <- svy_diab %>%
  group_by(age_grpOR) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_ = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)


write_csv(prevtot, "prevdiaboverall per age grp_2018-04-24.csv")


#### Crude aware diabetes prev by 5 year age group


svy_aware <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_aware_dbl, age_grpOR))

prevtot <- svy_aware %>%
  group_by(age_grpOR) %>% 
  summarise(diab_aware_prop = survey_mean(diabetic_aware_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_aware = 100*diab_aware_prop,
         diab_low = 100*diab_aware_prop_low,
         diab_upp = 100*diab_aware_prop_upp)


write_csv(prevtot, "prevawareby5 year age group_2018-04-21.csv")


#### Crude treated diabetes prev by 5 year age group


svy_treated <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_treated_dbl, age_grpOR))

prevtot <- svy_treated %>%
  group_by(age_grpOR) %>% 
  summarise(diab_treated_prop = survey_mean(diabetic_treated_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_treated = 100*diab_treated_prop,
         diab_low = 100*diab_treated_prop_low,
         diab_upp = 100*diab_treated_prop_upp)


write_csv(prevtot, "prevtreatedby5 year age group_2018-04-21.csv")


#### Crude controlled diabetes prev by 5 year age group


svy_controlled <- dhs_nomiss_diabetic_only_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(diabetic_controlled_dbl, age_grpOR))

prevtot <- svy_controlled %>%
  group_by(age_grpOR) %>% 
  summarise(diab_controlled_prop = survey_mean(diabetic_controlled_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_controlled = 100*diab_controlled_prop,
         diab_low = 100*diab_controlled_prop_low,
         diab_upp = 100*diab_controlled_prop_upp)


write_csv(prevtot, "prevcontrolledby5 year age group_2018-04-21.csv")

#### Crude diabetes prev per 5 year age group and sex


svy_diab <- dhs_nomiss_noNAinpsu %>% 
  as_survey_design(stratum = stratum,
                   ids = c(psuid, hh_id),
                   weights = p_wt_new,
                   variables = c(ex_diab_broad_ind_dbl, age_grpOR, female_lab))

prevtot <- svy_diab %>%
  group_by(age_grpOR, female_lab) %>% 
  summarise(diab_prop = survey_mean(ex_diab_broad_ind_dbl, proportion=TRUE, vartype = "ci")) %>% 
  mutate(diab_ = 100*diab_prop,
         diab_low = 100*diab_prop_low,
         diab_upp = 100*diab_prop_upp)


write_csv(prevtot, "prevdiaboverall per age grp and sex_2018-04-24.csv")

```

summary(dhs_nomiss_diabetic_only$ex_diab_narrow_ind)
summary(dhs_nomiss_diabetic_only$hbg12)
summary(dhs_nomiss_diabetic_only$ex_dia_med_ind)


dhs_nomiss_diabetic_only <- mutate(dhs_nomiss_diabetic_only,
                                   treated_and_unaware = ifelse(hbg12==0 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_diabetic_only$treated_and_unaware <- as.factor(dhs_nomiss_diabetic_only$treated_and_unaware)
summary(dhs_nomiss_diabetic_only$treated_and_unaware)

dhs_nomiss_diabetic_aware_only <- filter(dhs_nomiss_diabetic_only, hbg12==1)

summary(dhs_nomiss_diabetic_aware_only$ex_dia_med_ind)


dhs_men <- filter(dhs, (female)==0) 
summary(dhs_men$p_wt_new)
dhs_women <- filter(dhs, (female)==1) 
summary(dhs_women$p_wt_new)



dhs <-mutate(dhs,p_wt_new = ifelse( sex==1, p_wt_new * (426049510/647451)*(1/1000),
                                ifelse(sex==0,p_wt_new*(356018118/110204)*(1/1000), NA)))

dhs_men_test <- filter(dhs, (sex)==0) 
summary(dhs_men_test$p_wt_new)
dhs_women_test <- filter(dhs, (sex)==1) 
summary(dhs_women_test$p_wt_new)


```{r check significance for figure 3}
######significance check


###create function

#ggplotRegression <- function (fit) {
  
#  require(ggplot2)
  
#  ggplot(fit$model, aes_string(x = names(fit$model)[2], y = names(fit$model)[1])) + 
#    geom_point() +
#    stat_smooth(method = "lm", col = "red") +
#    labs(title = paste("Adj R2 = ",signif(summary(fit)$adj.r.squared, 5),
#                       "Intercept =",signif(fit$coef[[1]],5 ),
#                       " Slope =",signif(fit$coef[[2]], 5),
#                       " P =",signif(summary(fit)$coef[2,4], 5)))
#}


####check significance


#fit1 <- lm(diab ~ awaremean, data=statemean.dat)
#ggplotRegression(fit1)

#fit1 <- lm(diab ~ treatedmean, data=statemean.dat)
#ggplotRegression(fit1)

#fit1 <- lm(diab ~ controlledmean, data=statemean.dat)
#ggplotRegression(fit1)


summary(lm(diab ~ awaremean, data=statemean.dat))
summary(lm(diab ~ treatedmean, data=statemean.dat))
summary(lm(diab ~ controlledmean, data=statemean.dat))

```

```{r figure 3 WITH treated states GENERAL diabetes definition}



##############Regression figure low diab prevalence low outcomes DIABETES DEFINITION WITH TREATED states

######Create diabetes care indicators in dhs_nomiss


#####SPECIAL DIABETES DEFINITION

dhs_nomiss_fig3 <-mutate(dhs_nomiss,
             ex_diab_broad_ind =  ifelse(hbg12==1 | ex_diab_narrow_ind==1 , 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss_fig3$diabetic_aware <- as.factor(dhs_nomiss_fig3$diabetic_aware)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss_fig3$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss_fig3$diabetic_treated <- as.factor(dhs_nomiss_fig3$diabetic_treated)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss_fig3$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss_fig3$diabetic_controlled <- as.factor(dhs_nomiss_fig3$diabetic_controlled)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss_fig3$diabetic_controlled)


################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
              # Nothern
              zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                      # Northeastern
                                      ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                             # Central
                                             ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                    # Eastern
                                                    ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                           # Western
                                                           ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                  # Southern
                                                                  ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))





####### Adding state labels
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, state_lab = fct_recode(ex_state_ind, 
                                                      "HP" = "Himachal Pradesh",
                                                      "PB" = "Punjab",
                                                      "CH" = "Chandigarh",
                                                      "HR" = "Haryana",
                                                      "DL" = "Delhi",
                                                      "SK" = "Sikkim",
                                                      "DD" = "Daman and Diu",
                                                      "AR" = "Arunachal Pradesh",
                                                      "NL" = "Nagaland",
                                                      "MN" = "Manipur",
                                                      "MZ" = "Mizoram",
                                                      "TR" = "Tripura",
                                                      "ML" = "Meghalaya",
                                                      "WB" = "West Bengal",
                                                      "MH" = "Maharashtra",
                                                      "AP" = "Andhra Pradesh",
                                                      "KA" = "Karnataka",
                                                      "GA" = "Goa",
                                                      "KL" = "Kerala",
                                                      "PY" = "Puducherry",
                                                      "TN" = "Tamil Nadu",
                                                      "AN" = "Andaman and Nicobar Islands",
                                                      "TS" = "Telangana",
                                                      "UK" = "Uttarakhand",
                                                      "RJ" = "Rajasthan",
                                                      "UP" = "Uttar Pradesh",
                                                      "BR" = "Bihar",
                                                      "AS" = "Assam",
                                                      "JK" = "Jammu and Kashmir",
                                                      "GJ" = "Gujarat",
                                                      "JH" = "Jharkhand",
                                                      "OD" = "Odisha",
                                                      "CT" = "Chhattisgarh", 
                                                      "MP" = "Madhya Pradesh"))



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
                    urban_dbl = as.numeric(urban))

#install.packages("spatstat")

dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Puducherry") 
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Daman and Diu")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Delhi")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Lakshadweep")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Chandigarh")

#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss_fig3 %>%
  group_by(ex_state_ind) %>% 
  mutate(prop_urban = weighted.mean(urban_dbl, sworld_weight_india, na.rm=TRUE)) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(ex_state_ind, state_lab, zone, diab, awaremean, treatedmean, controlledmean,  prop_urban)

write.csv(statemean.dat, "Colored reg fig estimates.csv")
#install.packages("ggplot2")
install.packages("ggrepel")
library(ggplot2)
library(ggrepel)

#####Aware
stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(20, 40, 60,80), limits=c(0, 80)) +
  coord_fixed(80/10, expand=F)
stateawarefig


#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(20, 40, 60, 80), limits=c(0, 80)) +
  coord_fixed(80/10, expand=F)
stateawarefig

#####Controlled

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=controlledmean)) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=controlledmean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=controlledmean, label= state_lab group=as.factor(controlledmean)), outlier.shape = NA, alpha=0.8) +
  geom_text_repel(aes(y=diab, x=controlledmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=controlledmean,label=ifelse( diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=controlledmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Controlled, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(20, 40, 60, 80), limits=c(0, 80)) +
  coord_fixed(80/10, expand=F)
stateawarefig

```








```{r figure 3 Districts GENERAL diabetes definition FACETWRAP BY AGE}





#####Age 15-29 years



dhs_nomiss_1529 <- filter(dhs_nomiss,age<30)


#dhs_nomiss_1529 <- dhs_nomiss_1529%>%
 # group_by(d_id)%>%
 # mutate(sumdiab = sum(ex_diab_broad_ind_dbl))

#dhs_nomiss_1529 <- filter(dhs_nomiss_1529, sumdiab>5)



dhs_nomiss <- dhs_nomiss%>%
  group_by(d_id)%>%
  mutate(sumdiab = sum(ex_diab_broad_ind_dbl))

dhs_nomiss <- filter(dhs_nomiss, sumdiab>5)

#####SPECIAL DIABETES DEFINITION

dhs_nomiss_fig3 <-mutate(dhs_nomiss_1529,
             ex_diab_broad_ind =  ifelse(hbg12==1 | ex_diab_narrow_ind==1 , 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss_fig3$diabetic_aware <- as.factor(dhs_nomiss_fig3$diabetic_aware)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss_fig3$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss_fig3$diabetic_treated <- as.factor(dhs_nomiss_fig3$diabetic_treated)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss_fig3$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss_fig3$diabetic_controlled <- as.factor(dhs_nomiss_fig3$diabetic_controlled)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss_fig3$diabetic_controlled)


################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
              # Nothern
              zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                      # Northeastern
                                      ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                             # Central
                                             ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                    # Eastern
                                                    ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                           # Western
                                                           ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                  # Southern
                                                                  ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))





####### Adding state labels
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, state_lab = fct_recode(ex_state_ind, 
                                                      "HP" = "Himachal Pradesh",
                                                      "PB" = "Punjab",
                                                      "CH" = "Chandigarh",
                                                      "HR" = "Haryana",
                                                      "DL" = "Delhi",
                                                      "SK" = "Sikkim",
                                                      "DD" = "Daman and Diu",
                                                      "AR" = "Arunachal Pradesh",
                                                      "NL" = "Nagaland",
                                                      "MN" = "Manipur",
                                                      "MZ" = "Mizoram",
                                                      "TR" = "Tripura",
                                                      "ML" = "Meghalaya",
                                                      "WB" = "West Bengal",
                                                      "MH" = "Maharashtra",
                                                      "AP" = "Andhra Pradesh",
                                                      "KA" = "Karnataka",
                                                      "GA" = "Goa",
                                                      "KL" = "Kerala",
                                                      "PY" = "Puducherry",
                                                      "TN" = "Tamil Nadu",
                                                      "AN" = "Andaman and Nicobar Islands",
                                                      "TS" = "Telangana",
                                                      "UK" = "Uttarakhand",
                                                      "RJ" = "Rajasthan",
                                                      "UP" = "Uttar Pradesh",
                                                      "BR" = "Bihar",
                                                      "AS" = "Assam",
                                                      "JK" = "Jammu and Kashmir",
                                                      "GJ" = "Gujarat",
                                                      "JH" = "Jharkhand",
                                                      "OD" = "Odisha",
                                                      "CT" = "Chhattisgarh", 
                                                      "MP" = "Madhya Pradesh"))



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
                    urban_dbl = as.numeric(urban))

#install.packages("spatstat")

dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Puducherry") 
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Daman and Diu")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Delhi")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Lakshadweep")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Chandigarh")

#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss_fig3 %>%
  group_by(d_id) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(d_id, zone, diab, awaremean, treatedmean, controlledmean)

write.csv(statemean.dat, "Colored reg fig estimates with treated districts.csv")
#install.packages("ggplot2")
#install.packages("ggrepel")
library(ggplot2)
library(ggrepel)

#####Aware

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
   geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
 #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig

#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


#####Controlled

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=controlledmean)) +
  geom_jitter(aes(y=diab, x=controlledmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=controlledmean, label= state_lab group=as.factor(controlledmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=controlledmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=controlledmean,label=ifelse( diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=controlledmean,label=state_lab)) +
  #stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE)+
  theme_classic() + 
  labs(x = "Controlled, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


summary(lm(formula = diab ~ awaremean, data = statemean.dat))
summary(lm(formula = diab ~ treatedmean, data = statemean.dat))
summary(lm(formula = diab ~ controlledmean, data = statemean.dat))



#####Age 30-39 years



dhs_nomiss_3039 <- filter(dhs_nomiss,age>29 & age<40)

#####SPECIAL DIABETES DEFINITION

dhs_nomiss_fig3 <-mutate(dhs_nomiss_3039,
             ex_diab_broad_ind =  ifelse(hbg12==1 | ex_diab_narrow_ind==1 , 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss_fig3$diabetic_aware <- as.factor(dhs_nomiss_fig3$diabetic_aware)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss_fig3$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss_fig3$diabetic_treated <- as.factor(dhs_nomiss_fig3$diabetic_treated)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss_fig3$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss_fig3$diabetic_controlled <- as.factor(dhs_nomiss_fig3$diabetic_controlled)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss_fig3$diabetic_controlled)


################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
              # Nothern
              zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                      # Northeastern
                                      ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                             # Central
                                             ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                    # Eastern
                                                    ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                           # Western
                                                           ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                  # Southern
                                                                  ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))





####### Adding state labels
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, state_lab = fct_recode(ex_state_ind, 
                                                      "HP" = "Himachal Pradesh",
                                                      "PB" = "Punjab",
                                                      "CH" = "Chandigarh",
                                                      "HR" = "Haryana",
                                                      "DL" = "Delhi",
                                                      "SK" = "Sikkim",
                                                      "DD" = "Daman and Diu",
                                                      "AR" = "Arunachal Pradesh",
                                                      "NL" = "Nagaland",
                                                      "MN" = "Manipur",
                                                      "MZ" = "Mizoram",
                                                      "TR" = "Tripura",
                                                      "ML" = "Meghalaya",
                                                      "WB" = "West Bengal",
                                                      "MH" = "Maharashtra",
                                                      "AP" = "Andhra Pradesh",
                                                      "KA" = "Karnataka",
                                                      "GA" = "Goa",
                                                      "KL" = "Kerala",
                                                      "PY" = "Puducherry",
                                                      "TN" = "Tamil Nadu",
                                                      "AN" = "Andaman and Nicobar Islands",
                                                      "TS" = "Telangana",
                                                      "UK" = "Uttarakhand",
                                                      "RJ" = "Rajasthan",
                                                      "UP" = "Uttar Pradesh",
                                                      "BR" = "Bihar",
                                                      "AS" = "Assam",
                                                      "JK" = "Jammu and Kashmir",
                                                      "GJ" = "Gujarat",
                                                      "JH" = "Jharkhand",
                                                      "OD" = "Odisha",
                                                      "CT" = "Chhattisgarh", 
                                                      "MP" = "Madhya Pradesh"))



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
                    urban_dbl = as.numeric(urban))

#install.packages("spatstat")

dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Puducherry") 
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Daman and Diu")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Delhi")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Lakshadweep")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Chandigarh")

#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss_fig3 %>%
  group_by(d_id) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(d_id, zone, diab, awaremean, treatedmean, controlledmean)

write.csv(statemean.dat, "Colored reg fig estimates with treated districts.csv")
#install.packages("ggplot2")
#install.packages("ggrepel")
library(ggplot2)
library(ggrepel)

#####Aware

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
   geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
 #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


#####Controlled

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=controlledmean)) +
  geom_jitter(aes(y=diab, x=controlledmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=controlledmean, label= state_lab group=as.factor(controlledmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=controlledmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=controlledmean,label=ifelse( diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=controlledmean,label=state_lab)) +
  #stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE)+
  theme_classic() + 
  labs(x = "Controlled, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


summary(lm(formula = diab ~ awaremean, data = statemean.dat))
summary(lm(formula = diab ~ treatedmean, data = statemean.dat))
summary(lm(formula = diab ~ controlledmean, data = statemean.dat))


#####Age 40-49 years



dhs_nomiss_4049 <- filter(dhs_nomiss,age>39 & age<50)

#####SPECIAL DIABETES DEFINITION

dhs_nomiss_fig3 <-mutate(dhs_nomiss_4049,
             ex_diab_broad_ind =  ifelse(hbg12==1 | ex_diab_narrow_ind==1 , 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss_fig3$diabetic_aware <- as.factor(dhs_nomiss_fig3$diabetic_aware)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss_fig3$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_fig3[which(is.na(dhs_nomiss_fig3$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss_fig3$diabetic_treated <- as.factor(dhs_nomiss_fig3$diabetic_treated)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss_fig3$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss_fig3$diabetic_controlled <- as.factor(dhs_nomiss_fig3$diabetic_controlled)

dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3,
                                   
                                   diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss_fig3$diabetic_controlled)


################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
              # Nothern
              zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                      # Northeastern
                                      ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                             # Central
                                             ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                    # Eastern
                                                    ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                           # Western
                                                           ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                  # Southern
                                                                  ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))





####### Adding state labels
dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, state_lab = fct_recode(ex_state_ind, 
                                                      "HP" = "Himachal Pradesh",
                                                      "PB" = "Punjab",
                                                      "CH" = "Chandigarh",
                                                      "HR" = "Haryana",
                                                      "DL" = "Delhi",
                                                      "SK" = "Sikkim",
                                                      "DD" = "Daman and Diu",
                                                      "AR" = "Arunachal Pradesh",
                                                      "NL" = "Nagaland",
                                                      "MN" = "Manipur",
                                                      "MZ" = "Mizoram",
                                                      "TR" = "Tripura",
                                                      "ML" = "Meghalaya",
                                                      "WB" = "West Bengal",
                                                      "MH" = "Maharashtra",
                                                      "AP" = "Andhra Pradesh",
                                                      "KA" = "Karnataka",
                                                      "GA" = "Goa",
                                                      "KL" = "Kerala",
                                                      "PY" = "Puducherry",
                                                      "TN" = "Tamil Nadu",
                                                      "AN" = "Andaman and Nicobar Islands",
                                                      "TS" = "Telangana",
                                                      "UK" = "Uttarakhand",
                                                      "RJ" = "Rajasthan",
                                                      "UP" = "Uttar Pradesh",
                                                      "BR" = "Bihar",
                                                      "AS" = "Assam",
                                                      "JK" = "Jammu and Kashmir",
                                                      "GJ" = "Gujarat",
                                                      "JH" = "Jharkhand",
                                                      "OD" = "Odisha",
                                                      "CT" = "Chhattisgarh", 
                                                      "MP" = "Madhya Pradesh"))



dhs_nomiss_fig3 <- mutate(dhs_nomiss_fig3, 
                    urban_dbl = as.numeric(urban))

#install.packages("spatstat")

dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Puducherry") 
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Daman and Diu")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Delhi")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Lakshadweep")
dhs_nomiss_fig3 <- dplyr::filter(dhs_nomiss_fig3, ex_state_ind != "Chandigarh")

#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss_fig3 %>%
  group_by(d_id) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(d_id, zone, diab, awaremean, treatedmean, controlledmean)

write.csv(statemean.dat, "Colored reg fig estimates with treated districts.csv")
#install.packages("ggplot2")
#install.packages("ggrepel")
library(ggplot2)
library(ggrepel)

#####Aware

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
   geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
 #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


#####Controlled

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=controlledmean)) +
  geom_jitter(aes(y=diab, x=controlledmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=controlledmean, label= state_lab group=as.factor(controlledmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=controlledmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=controlledmean,label=ifelse( diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=controlledmean,label=state_lab)) +
  #stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE)+
  theme_classic() + 
  labs(x = "Controlled, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15,20,25,30), limits=c(0, 30)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 102)) +
  coord_fixed(100/30, expand=F)
stateawarefig


summary(lm(formula = diab ~ awaremean, data = statemean.dat))
summary(lm(formula = diab ~ treatedmean, data = statemean.dat))
summary(lm(formula = diab ~ controlledmean, data = statemean.dat))




```












```{r figure 3blood glucose measurement only }


##############Regression figure low diab prevalence low outcomes DIABETES DEFINITION Blood glucose measurement only

######Create diabetes care indicators in dhs_nomiss


#####SPECIAL DIABETES DEFINITION

dhs_nomiss <-mutate(dhs_nomiss,
                    ex_diab_broad_ind =  ifelse(ex_diab_narrow_ind==1, 1, 0))
dhs_nomiss[which(is.na(dhs_nomiss$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss <- mutate(dhs_nomiss,
                     diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss$diabetic_aware <- as.factor(dhs_nomiss$diabetic_aware)

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss <- mutate(dhs_nomiss,
                     diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss[which(is.na(dhs_nomiss$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss$diabetic_treated <- as.factor(dhs_nomiss$diabetic_treated)

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss <- mutate(dhs_nomiss,
                     diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss$diabetic_controlled <- as.factor(dhs_nomiss$diabetic_controlled)

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss$diabetic_controlled)


################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss <- mutate(dhs_nomiss, 
                     # Nothern
                     zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                             # Northeastern
                                             ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                                    # Central
                                                    ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                           # Eastern
                                                           ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                                  # Western
                                                                  ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                         # Southern
                                                                         ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))





####### Adding state labels
dhs_nomiss <- mutate(dhs_nomiss, state_lab = fct_recode(ex_state_ind, 
                                                        "HP" = "Himachal Pradesh",
                                                        "PB" = "Punjab",
                                                        "CH" = "Chandigarh",
                                                        "HR" = "Haryana",
                                                        "DL" = "Delhi",
                                                        "SK" = "Sikkim",
                                                        "DD" = "Daman and Diu",
                                                        "AR" = "Arunachal Pradesh",
                                                        "NL" = "Nagaland",
                                                        "MN" = "Manipur",
                                                        "MZ" = "Mizoram",
                                                        "TR" = "Tripura",
                                                        "ML" = "Meghalaya",
                                                        "WB" = "West Bengal",
                                                        "MH" = "Maharashtra",
                                                        "AP" = "Andhra Pradesh",
                                                        "KA" = "Karnataka",
                                                        "GA" = "Goa",
                                                        "KL" = "Kerala",
                                                        "PY" = "Puducherry",
                                                        "TN" = "Tamil Nadu",
                                                        "AN" = "Andaman and Nicobar Islands",
                                                        "TS" = "Telangana",
                                                        "UK" = "Uttarakhand",
                                                        "RJ" = "Rajasthan",
                                                        "UP" = "Uttar Pradesh",
                                                        "BR" = "Bihar",
                                                        "AS" = "Assam",
                                                        "JK" = "Jammu and Kashmir",
                                                        "GJ" = "Gujarat",
                                                        "JH" = "Jharkhand",
                                                        "OD" = "Odisha",
                                                        "CT" = "Chhattisgarh", 
                                                        "MP" = "Madhya Pradesh"))



dhs_nomiss <- mutate(dhs_nomiss, 
                     urban_dbl = as.numeric(urban))

#install.packages("spatstat")

dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Puducherry") 
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Daman and Diu")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Delhi")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Lakshadweep")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Chandigarh")





#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss %>%
  group_by(d_id) %>% 
  mutate(prop_urban = weighted.mean(urban_dbl, sworld_weight_india, na.rm=TRUE)) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind_dbl, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(d_id, state_lab, zone, diab, awaremean, treatedmean, controlledmean,  prop_urban)

write.csv(statemean.dat, "Colored reg fig estimates without treted states.csv")
#install.packages("ggplot2")
install.packages("ggrepel")
#library(ggplot2)
#library(ggrepel)

#####Aware
stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(20, 40, 60), limits=c(0, 60)) +
  coord_fixed(60/10, expand=F)
stateawarefig

#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(20, 40, 60), limits=c(0, 60)) +
  coord_fixed(60/10, expand=F)
stateawarefig


```

```{r figure 3 with treated and districts GENERAL DIABETES DEFINITION}

##############Regression figure low diab prevalence low outcomes DIABETES DEFINITION WITH TREATED AND DIStricts

######Create diabetes care indicators in dhs_nomiss


#####SPECIAL DIABETES DEFINITION

dhs_nomiss <-mutate(dhs_nomiss,
                    ex_diab_broad_ind =  ifelse(hbg12==1  | ex_diab_narrow_ind==1, 1, 0))
dhs_nomiss[which(is.na(dhs_nomiss$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss <- mutate(dhs_nomiss,
                     diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss$diabetic_aware <- as.factor(dhs_nomiss$diabetic_aware)

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss <- mutate(dhs_nomiss,
                     diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss[which(is.na(dhs_nomiss$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss$diabetic_treated <- as.factor(dhs_nomiss$diabetic_treated)

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss <- mutate(dhs_nomiss,
                     diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss$diabetic_controlled <- as.factor(dhs_nomiss$diabetic_controlled)

dhs_nomiss <- mutate(dhs_nomiss,
                     
                     diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss$diabetic_controlled)

################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss <- mutate(dhs_nomiss, 
                     # Nothern
                     zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                             # Northeastern
                                             ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                                    # Central
                                                    ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                           # Eastern
                                                           ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                                  # Western
                                                                  ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                         # Southern
                                                                         ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))



dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Puducherry") 
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Daman and Diu")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Delhi")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Lakshadweep")
dhs_nomiss <- dplyr::filter(dhs_nomiss, ex_state_ind != "Chandigarh")



#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss %>%
  group_by(d_id) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(d_id, zone, diab, awaremean, treatedmean, controlledmean)

write.csv(statemean.dat, "Colored reg fig estimates with treated districts.csv")
#install.packages("ggplot2")
#install.packages("ggrepel")
library(ggplot2)
library(ggrepel)

#####Aware

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
   geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
 #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15), limits=c(0, 15)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 100)) +
  coord_fixed(100/15, expand=F)
stateawarefig

#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 15), limits=c(0, 15)) +
  scale_x_continuous(breaks = c(0,20, 40, 60, 80, 100), limits=c(-2, 100)) +
  coord_fixed(100/15, expand=F)
stateawarefig

#####Controlled

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=controlledmean)) +
  geom_jitter(aes(y=diab, x=controlledmean, color=zone, shape=zone), size=2.5) +
  geom_smooth(method='lm', se= FALSE, color="black") +
  #geom_boxplot(mapping=aes(y=diab, x=controlledmean, label= state_lab group=as.factor(controlledmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=controlledmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=controlledmean,label=ifelse( diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=controlledmean,label=state_lab)) +
  #stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE)+
  theme_classic() + 
  labs(x = "Controlled, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10,15), limits=c(0, 15)) +
  scale_x_continuous(breaks = c(0,20, 40, 60, 80,100), limits=c(-2, 100)) +
  coord_fixed(100/15, expand=F)
stateawarefig

summary(lm(formula = diab ~ awaremean, data = statemean.dat))
summary(lm(formula = diab ~ treatedmean, data = statemean.dat))
summary(lm(formula = diab ~ controlledmean, data = statemean.dat))

```

```{r regression districts  ONLY BLOOD glucose measurement}


dhs_nomiss_district_onlymeasure <-dhs_nomiss

#dhs_nomiss_district_onlymeasure <- dhs_nomiss_district_onlymeasure%>%
#  group_by(d_id)%>%
#  mutate(sumdiab = sum(ex_diab_broad_ind_dbl))

#dhs_nomiss_district_onlymeasure <- filter(dhs_nomiss_district_onlymeasure, sumdiab>10)


######Create diabetes care indicators in dhs_nomiss


#####SPECIAL DIABETES DEFINITION

dhs_nomiss_district_onlymeasure <-mutate(dhs_nomiss_district_onlymeasure,
                    ex_diab_broad_ind =  ifelse( ex_diab_narrow_ind==1, 1, 0))
dhs_nomiss_district_onlymeasure[which(is.na(dhs_nomiss_district_onlymeasure$ex_diab_broad_ind)==T), "ex_diab_broad_ind"]<-0

dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     
                     ex_diab_broad_ind_dbl = as.numeric(ex_diab_broad_ind))


##aware diabetic




##aware diabetic as subset of diabetics



dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     diabetic_aware = ifelse( ex_diab_broad_ind==1 & hbg12==1, 1, 0))

dhs_nomiss_district_onlymeasure$diabetic_aware <- as.factor(dhs_nomiss_district_onlymeasure$diabetic_aware)

dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     
                     diabetic_aware_dbl = as.numeric(diabetic_aware)-1)

summary(dhs_nomiss_district_onlymeasure$diabetic_aware)

##treated diabetic as subset diabetics


dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     diabetic_treated = ifelse(ex_diab_broad_ind==1 & ex_dia_med_ind==1, 1, 0))
dhs_nomiss_district_onlymeasure[which(is.na(dhs_nomiss_district_onlymeasure$diabetic_treated)==T), "diabetic_treated"]<-0

dhs_nomiss_district_onlymeasure$diabetic_treated <- as.factor(dhs_nomiss_district_onlymeasure$diabetic_treated)

dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     
                     diabetic_treated_dbl = as.numeric(diabetic_treated)-1)

summary(dhs_nomiss_district_onlymeasure$diabetic_treated)

##controlled diabetic as subset of  diabetics

dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     diabetic_controlled = ifelse((ex_diab_broad_ind==1 & ex_dia_med_ind==1 & ex_diab_narrow_ind==0), 1, 0))

dhs_nomiss_district_onlymeasure$diabetic_controlled <- as.factor(dhs_nomiss_district_onlymeasure$diabetic_controlled)

dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure,
                     
                     diabetic_controlled_dbl = as.numeric(diabetic_controlled)-1)

summary(dhs_nomiss_district_onlymeasure$diabetic_controlled)

################## Zones as per: https://en.wikipedia.org/wiki/Administrative_divisions_of_India
dhs_nomiss_district_onlymeasure <- mutate(dhs_nomiss_district_onlymeasure, 
                     # Nothern
                     zone = as.factor(ifelse(ex_state_ind=="Chandigarh" | ex_state_ind=="NCT of Delhi" | ex_state_ind=="Haryana" | ex_state_ind=="Himachal Pradesh" | ex_state_ind=="Punjab" | ex_state_ind=="Rajasthan" | ex_state_ind=="Jammu and Kashmir", "North",
                                             # Northeastern
                                             ifelse(ex_state_ind=="Assam" | ex_state_ind=="Arunachal Pradesh" | ex_state_ind=="Manipur" | ex_state_ind=="Meghalaya" | ex_state_ind=="Mizoram" | ex_state_ind=="Nagaland" | ex_state_ind=="Sikkim" | ex_state_ind=="Tripura", "Northeast",
                                                    # Central
                                                    ifelse(ex_state_ind=="Chhattisgarh" | ex_state_ind=="Madhya Pradesh" | ex_state_ind=="Uttarakhand" | ex_state_ind== "Uttar Pradesh", "Central",
                                                           # Eastern
                                                           ifelse(ex_state_ind=="Bihar" | ex_state_ind=="Jharkhand" | ex_state_ind=="Odisha" | ex_state_ind=="West Bengal", "East",
                                                                  # Western
                                                                  ifelse(ex_state_ind=="Daman and Diu" | ex_state_ind=="Goa" | ex_state_ind=="Maharashtra" | ex_state_ind=="Gujarat", "West",
                                                                         # Southern
                                                                         ifelse(ex_state_ind=="Andaman and Nicobar" | ex_state_ind=="Andhra Pradesh" | ex_state_ind=="Karnataka" | ex_state_ind=="Kerala" | ex_state_ind=="Puducherry" | ex_state_ind=="Tamil Nadu" | ex_state_ind=="Telangana", "South", NA))))))))



dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Puducherry") 
dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Daman and Diu")
dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Delhi")
dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Dadra and Nagar Haveli")
dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Andaman and Nicobar Islands")
dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Lakshadweep")
dhs_nomiss_district_onlymeasure <- dplyr::filter(dhs_nomiss_district_onlymeasure, ex_state_ind != "Chandigarh")



#####No rural urban

library(spatstat)
statemean.dat <- dhs_nomiss_district_onlymeasure %>%
  group_by(d_id) %>% 
  mutate(diab = weighted.mean(ex_diab_broad_ind, sworld_weight_india, na.rm=TRUE)*100,
         awaremean = (((weighted.mean(diabetic_aware_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100), 
         treatedmean = (((weighted.mean(diabetic_treated_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100),
         controlledmean = (((weighted.mean(diabetic_controlled_dbl, sworld_weight_india, na.rm=TRUE)*100)/diab)*100)) %>%
  filter(row_number()==1) %>%
  ungroup() %>% 
  dplyr::select(d_id, zone, diab, awaremean, treatedmean, controlledmean)

write.csv(statemean.dat, "Colored reg fig estimates with treated districts.csv")
#install.packages("ggplot2")
#install.packages("ggrepel")
library(ggplot2)
library(ggrepel)

#####Aware

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=awaremean)) +
  geom_smooth(method='lm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=awaremean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=awaremean, label= state_lab group=as.factor(awaremean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=awaremean, label = state_lab)) +
  #geom_text(aes(y=diab, x=awaremean,label=ifelse(awaremean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=awaremean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Aware, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(0, 20, 40, 60, 80, 100), limits=c(-2, 100)) +
  coord_fixed(100/10, expand=F)
stateawarefig
#######Treated

stateawarefig <- statemean.dat %>% 
  
  ggplot(aes(y=diab, x=treatedmean)) +
  geom_smooth(method='glm', se= FALSE, color="black") +
  geom_jitter(aes(y=diab, x=treatedmean, color=zone, shape=zone), size=2.5) +
  #geom_boxplot(mapping=aes(y=diab, x=treatedmean, label= state_lab group=as.factor(treatedmean)), outlier.shape = NA, alpha=0.8) +
  #geom_text_repel(aes(y=diab, x=treatedmean, label = state_lab)) +
  #geom_text(aes(y=diab, x=treatedmean,label=ifelse(treatedmean<0.3 | diab>5,as.character(state_lab),''),hjust=1.5, vjust=0)) +
  #geom_smooth(method='glm', se= FALSE)
  #geom_label_repel (aes(y=diab, x=treatedmean,label=state_lab)) +
  theme_classic() + 
  labs(x = "Treated, in %",
       y = " Diabetes prevalence, in %",
       fill="") +
  theme(axis.text=element_text(size=20),
        axis.title=element_text(size=22, face="bold"),
        legend.text=element_text(size=20),
        legend.title = element_blank(),
        #legend.position="bottom",
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        strip.text.x = element_text(size=22, face="bold"),
        strip.background = element_blank(),
        panel.spacing = unit(2, "lines")) + 
  scale_color_brewer(palette="Dark2") +
  scale_y_continuous(breaks = c(0, 5, 10, 10), limits=c(0, 10)) +
  scale_x_continuous(breaks = c(0,20, 40, 60, 80, 100), limits=c(-2, 100)) +
  coord_fixed(100/10, expand=F)
stateawarefig


summary(lm(formula = diab ~ awaremean, data = statemean.dat))
summary(lm(formula = diab ~ treatedmean, data = statemean.dat))

```


length(which(dhs_nomiss$age_grpOR=="25-29" & dhs_nomiss$sex==0 & dhs_nomiss$urban_lab=="Urban"& dhs_nomiss$ex_diab_broad_ind==1 ))


length(which(dhs_nomiss_diabetic_only$age_grpOR=="15-19" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))

length(which(dhs_nomiss_diabetic_only$age_grpOR=="15-19" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban" ))

length(which(dhs_nomiss_diabetic_only$age_grpOR=="20-24" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban" ))

length(which(dhs_nomiss_diabetic_only$age_grpOR=="25-29" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban" ))


length(which(dhs_nomiss_diabetic_only$age_grpOR=="30-34" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban" ))



length(which(dhs_nomiss_diabetic_only$age_grpOR=="25-29" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))


length(which(dhs_nomiss_diabetic_only$educatnames=="Primary school unfinished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))

length(which(dhs_nomiss_diabetic_only$educatnames=="Primary school unfinished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"))



length(which(dhs_nomiss_diabetic_only$educatnames=="Primary school finished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))

length(which(dhs_nomiss_diabetic_only$educatnames=="Secondary school unfinished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))


length(which(dhs_nomiss_diabetic_only$educatnames=="Primary school finished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban" ))


####educatnames how many in small group

length(which(dhs_nomiss_diabetic_only$educatnames=="Primary school unfinished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))

length(which(dhs_nomiss_diabetic_only$educatnames=="Primary school finished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))

length(which(dhs_nomiss_diabetic_only$educatnames=="Secondary school unfinished" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))

length(which(dhs_nomiss_diabetic_only$educatnames=="Secondary school or above" & dhs_nomiss_diabetic_only$sex==0 & dhs_nomiss_diabetic_only$urban_lab=="Urban"& dhs_nomiss_diabetic_only$diabetic_aware==1 ))









#regression figure diabetes wealth#


dhs_nomiss_regress <- filter(dhs_nomiss, is.na(ex_diab_narrow_ind)==FALSE  & is.na(sex)==FALSE & is.na(age_grpOR)==FALSE & is.na(married)==FALSE & 
                               is.na(wealth_quintile_rurb)==FALSE & is.na(educat)==FALSE & is.na(urban)==FALSE & is.na(d_id)==F)
dhs_nomiss_regress <- augment(multiv_feglm, dhs_nomiss, type.predict = "response", se.fit = TRUE)


dhs_nomiss_regress <- dhs_nomiss_regress %>% 
  mutate(prob_diab = .fitted,
         se_diab = .se.fit) %>% 
  dplyr::select(-.resid, -.hat, -.sigma, -.cooksd, -.std.resid, -.fitted, -.se.fit)


# Create dataset for plotting
pmeans_diab <- aggregate(prob_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_regress, FUN = mean)
p_standerr_diab <- aggregate(se_diab ~ age_grpOR + wealth_quintile_rurb_lab + urban_lab, data = dhs_nomiss_regress, FUN = mean)
fig4.diab <- left_join(pmeans_diab, p_standerr_diab, by = c("age_grpOR", "wealth_quintile_rurb_lab", "urban_lab"))
fig4.diab <- mutate(fig4.diab, 
                    lowerci = prob_diab-(1.96*se_diab),
                    upperci = prob_diab+(1.96*se_diab))
write.csv(fig4.diab, "regressionfigure_diab.csv")

# Set errors bars for wealth quintiles 2, 3 and 4 to zero
fig4.diab <- fig4.diab %>% 
  mutate(lowerci_alt=ifelse(wealth_quintile_rurb_lab=="Q (Richest)" | wealth_quintile_rurb_lab=="Q (Poorest)", lowerci, NA),
         upperci_alt=ifelse(wealth_quintile_rurb_lab=="Q (Richest)" | wealth_quintile_rurb_lab=="Q (Poorest)", upperci, NA),
         alphaindic = as.factor(ifelse(wealth_quintile_rurb_lab=="Q (Richest)" | wealth_quintile_rurb_lab=="Q (Poorest)", 1, 0)))


# Draw the actual figure
brightness <- function(rgbcol, v) {
  conv <- as.list(as.data.frame(t(rgb2hsv(col2rgb(rgbcol)))))
  conv[[3]] <- v
  do.call(hsv, conv)
}

diabpredfig <- fig4.diab %>% 
  ggplot() +
  stat_summary(aes(y=100*prob_diab, x=age_grpOR, color=wealth_quintile_rurb_lab, shape=wealth_quintile_rurb_lab), fun.y="mean", size=3.0, geom="point") +
  geom_errorbar(aes(ymin=100*lowerci_alt, ymax=100*upperci_alt, x=age_grpOR, color=wealth_quintile_rurb_lab), width=0.3, show.legend = F) +
  facet_wrap(~urban_lab) +
  coord_fixed(ylim=c(0, 22), ratio= 8/30, expand=T) +
  theme_classic() +
  labs(x = "Age group, y",
       y = "Probability, %",
       fill="") +
  theme(axis.text.y=element_text(size=18, family="Times"),
        axis.text.x=element_text(size=18, angle=45, hjust=1, family="Times"),
        axis.title=element_text(size=21, face="bold", family="Times"),
        legend.title=element_text(size=18, family="Times"),
        legend.text=element_text(size=18, family="Times"),
        strip.text=element_text(size=18, family="Times"), 
        panel.spacing = unit(2.5, "lines"),
        axis.title.x = element_text(margin = margin(t = 20)),
        axis.title.y = element_text(margin = margin(r = 20)),
        plot.title = element_text(size=24, face="bold", family="Times"))+
  scale_colour_manual(values = brightness("red", seq(0.0, 1.0, length = 5)), name="Household Wealth quintile") +
  scale_shape_manual(values = c(19, 25, 22, 23, 17), name="Household Wealth quintile") 
diabpredfig





